summaryrefslogtreecommitdiffstats
path: root/extensions/spellcheck/hunspell/src
diff options
context:
space:
mode:
Diffstat (limited to 'extensions/spellcheck/hunspell/src')
-rw-r--r--extensions/spellcheck/hunspell/src/COPYING.MPL470
-rw-r--r--extensions/spellcheck/hunspell/src/README.md325
-rw-r--r--extensions/spellcheck/hunspell/src/affentry.cxx983
-rw-r--r--extensions/spellcheck/hunspell/src/affentry.hxx223
-rw-r--r--extensions/spellcheck/hunspell/src/affixmgr.cxx4875
-rw-r--r--extensions/spellcheck/hunspell/src/affixmgr.hxx368
-rw-r--r--extensions/spellcheck/hunspell/src/atypes.hxx129
-rw-r--r--extensions/spellcheck/hunspell/src/baseaffix.hxx74
-rw-r--r--extensions/spellcheck/hunspell/src/csutil.cxx2549
-rw-r--r--extensions/spellcheck/hunspell/src/csutil.hxx323
-rw-r--r--extensions/spellcheck/hunspell/src/filemgr.hxx77
-rw-r--r--extensions/spellcheck/hunspell/src/hashmgr.cxx1415
-rw-r--r--extensions/spellcheck/hunspell/src/hashmgr.hxx182
-rw-r--r--extensions/spellcheck/hunspell/src/htypes.hxx75
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.cxx2249
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.h162
-rw-r--r--extensions/spellcheck/hunspell/src/hunspell.hxx232
-rw-r--r--extensions/spellcheck/hunspell/src/hunvisapi.h18
-rw-r--r--extensions/spellcheck/hunspell/src/langnum.hxx76
-rw-r--r--extensions/spellcheck/hunspell/src/license.hunspell54
-rw-r--r--extensions/spellcheck/hunspell/src/license.myspell61
-rw-r--r--extensions/spellcheck/hunspell/src/moz.build30
-rw-r--r--extensions/spellcheck/hunspell/src/moz.yaml16
-rw-r--r--extensions/spellcheck/hunspell/src/phonet.cxx270
-rw-r--r--extensions/spellcheck/hunspell/src/phonet.hxx50
-rw-r--r--extensions/spellcheck/hunspell/src/replist.cxx196
-rw-r--r--extensions/spellcheck/hunspell/src/replist.hxx100
-rw-r--r--extensions/spellcheck/hunspell/src/sources.mozbuild17
-rw-r--r--extensions/spellcheck/hunspell/src/suggestmgr.cxx2263
-rw-r--r--extensions/spellcheck/hunspell/src/suggestmgr.hxx183
-rw-r--r--extensions/spellcheck/hunspell/src/w_char.hxx72
31 files changed, 18117 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/src/COPYING.MPL b/extensions/spellcheck/hunspell/src/COPYING.MPL
new file mode 100644
index 0000000000..7714141d15
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/COPYING.MPL
@@ -0,0 +1,470 @@
+ MOZILLA PUBLIC LICENSE
+ Version 1.1
+
+ ---------------
+
+1. Definitions.
+
+ 1.0.1. "Commercial Use" means distribution or otherwise making the
+ Covered Code available to a third party.
+
+ 1.1. "Contributor" means each entity that creates or contributes to
+ the creation of Modifications.
+
+ 1.2. "Contributor Version" means the combination of the Original
+ Code, prior Modifications used by a Contributor, and the Modifications
+ made by that particular Contributor.
+
+ 1.3. "Covered Code" means the Original Code or Modifications or the
+ combination of the Original Code and Modifications, in each case
+ including portions thereof.
+
+ 1.4. "Electronic Distribution Mechanism" means a mechanism generally
+ accepted in the software development community for the electronic
+ transfer of data.
+
+ 1.5. "Executable" means Covered Code in any form other than Source
+ Code.
+
+ 1.6. "Initial Developer" means the individual or entity identified
+ as the Initial Developer in the Source Code notice required by Exhibit
+ A.
+
+ 1.7. "Larger Work" means a work which combines Covered Code or
+ portions thereof with code not governed by the terms of this License.
+
+ 1.8. "License" means this document.
+
+ 1.8.1. "Licensable" means having the right to grant, to the maximum
+ extent possible, whether at the time of the initial grant or
+ subsequently acquired, any and all of the rights conveyed herein.
+
+ 1.9. "Modifications" means any addition to or deletion from the
+ substance or structure of either the Original Code or any previous
+ Modifications. When Covered Code is released as a series of files, a
+ Modification is:
+ A. Any addition to or deletion from the contents of a file
+ containing Original Code or previous Modifications.
+
+ B. Any new file that contains any part of the Original Code or
+ previous Modifications.
+
+ 1.10. "Original Code" means Source Code of computer software code
+ which is described in the Source Code notice required by Exhibit A as
+ Original Code, and which, at the time of its release under this
+ License is not already Covered Code governed by this License.
+
+ 1.10.1. "Patent Claims" means any patent claim(s), now owned or
+ hereafter acquired, including without limitation, method, process,
+ and apparatus claims, in any patent Licensable by grantor.
+
+ 1.11. "Source Code" means the preferred form of the Covered Code for
+ making modifications to it, including all modules it contains, plus
+ any associated interface definition files, scripts used to control
+ compilation and installation of an Executable, or source code
+ differential comparisons against either the Original Code or another
+ well known, available Covered Code of the Contributor's choice. The
+ Source Code can be in a compressed or archival form, provided the
+ appropriate decompression or de-archiving software is widely available
+ for no charge.
+
+ 1.12. "You" (or "Your") means an individual or a legal entity
+ exercising rights under, and complying with all of the terms of, this
+ License or a future version of this License issued under Section 6.1.
+ For legal entities, "You" includes any entity which controls, is
+ controlled by, or is under common control with You. For purposes of
+ this definition, "control" means (a) the power, direct or indirect,
+ to cause the direction or management of such entity, whether by
+ contract or otherwise, or (b) ownership of more than fifty percent
+ (50%) of the outstanding shares or beneficial ownership of such
+ entity.
+
+2. Source Code License.
+
+ 2.1. The Initial Developer Grant.
+ The Initial Developer hereby grants You a world-wide, royalty-free,
+ non-exclusive license, subject to third party intellectual property
+ claims:
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Initial Developer to use, reproduce,
+ modify, display, perform, sublicense and distribute the Original
+ Code (or portions thereof) with or without Modifications, and/or
+ as part of a Larger Work; and
+
+ (b) under Patents Claims infringed by the making, using or
+ selling of Original Code, to make, have made, use, practice,
+ sell, and offer for sale, and/or otherwise dispose of the
+ Original Code (or portions thereof).
+
+ (c) the licenses granted in this Section 2.1(a) and (b) are
+ effective on the date Initial Developer first distributes
+ Original Code under the terms of this License.
+
+ (d) Notwithstanding Section 2.1(b) above, no patent license is
+ granted: 1) for code that You delete from the Original Code; 2)
+ separate from the Original Code; or 3) for infringements caused
+ by: i) the modification of the Original Code or ii) the
+ combination of the Original Code with other software or devices.
+
+ 2.2. Contributor Grant.
+ Subject to third party intellectual property claims, each Contributor
+ hereby grants You a world-wide, royalty-free, non-exclusive license
+
+ (a) under intellectual property rights (other than patent or
+ trademark) Licensable by Contributor, to use, reproduce, modify,
+ display, perform, sublicense and distribute the Modifications
+ created by such Contributor (or portions thereof) either on an
+ unmodified basis, with other Modifications, as Covered Code
+ and/or as part of a Larger Work; and
+
+ (b) under Patent Claims infringed by the making, using, or
+ selling of Modifications made by that Contributor either alone
+ and/or in combination with its Contributor Version (or portions
+ of such combination), to make, use, sell, offer for sale, have
+ made, and/or otherwise dispose of: 1) Modifications made by that
+ Contributor (or portions thereof); and 2) the combination of
+ Modifications made by that Contributor with its Contributor
+ Version (or portions of such combination).
+
+ (c) the licenses granted in Sections 2.2(a) and 2.2(b) are
+ effective on the date Contributor first makes Commercial Use of
+ the Covered Code.
+
+ (d) Notwithstanding Section 2.2(b) above, no patent license is
+ granted: 1) for any code that Contributor has deleted from the
+ Contributor Version; 2) separate from the Contributor Version;
+ 3) for infringements caused by: i) third party modifications of
+ Contributor Version or ii) the combination of Modifications made
+ by that Contributor with other software (except as part of the
+ Contributor Version) or other devices; or 4) under Patent Claims
+ infringed by Covered Code in the absence of Modifications made by
+ that Contributor.
+
+3. Distribution Obligations.
+
+ 3.1. Application of License.
+ The Modifications which You create or to which You contribute are
+ governed by the terms of this License, including without limitation
+ Section 2.2. The Source Code version of Covered Code may be
+ distributed only under the terms of this License or a future version
+ of this License released under Section 6.1, and You must include a
+ copy of this License with every copy of the Source Code You
+ distribute. You may not offer or impose any terms on any Source Code
+ version that alters or restricts the applicable version of this
+ License or the recipients' rights hereunder. However, You may include
+ an additional document offering the additional rights described in
+ Section 3.5.
+
+ 3.2. Availability of Source Code.
+ Any Modification which You create or to which You contribute must be
+ made available in Source Code form under the terms of this License
+ either on the same media as an Executable version or via an accepted
+ Electronic Distribution Mechanism to anyone to whom you made an
+ Executable version available; and if made available via Electronic
+ Distribution Mechanism, must remain available for at least twelve (12)
+ months after the date it initially became available, or at least six
+ (6) months after a subsequent version of that particular Modification
+ has been made available to such recipients. You are responsible for
+ ensuring that the Source Code version remains available even if the
+ Electronic Distribution Mechanism is maintained by a third party.
+
+ 3.3. Description of Modifications.
+ You must cause all Covered Code to which You contribute to contain a
+ file documenting the changes You made to create that Covered Code and
+ the date of any change. You must include a prominent statement that
+ the Modification is derived, directly or indirectly, from Original
+ Code provided by the Initial Developer and including the name of the
+ Initial Developer in (a) the Source Code, and (b) in any notice in an
+ Executable version or related documentation in which You describe the
+ origin or ownership of the Covered Code.
+
+ 3.4. Intellectual Property Matters
+ (a) Third Party Claims.
+ If Contributor has knowledge that a license under a third party's
+ intellectual property rights is required to exercise the rights
+ granted by such Contributor under Sections 2.1 or 2.2,
+ Contributor must include a text file with the Source Code
+ distribution titled "LEGAL" which describes the claim and the
+ party making the claim in sufficient detail that a recipient will
+ know whom to contact. If Contributor obtains such knowledge after
+ the Modification is made available as described in Section 3.2,
+ Contributor shall promptly modify the LEGAL file in all copies
+ Contributor makes available thereafter and shall take other steps
+ (such as notifying appropriate mailing lists or newsgroups)
+ reasonably calculated to inform those who received the Covered
+ Code that new knowledge has been obtained.
+
+ (b) Contributor APIs.
+ If Contributor's Modifications include an application programming
+ interface and Contributor has knowledge of patent licenses which
+ are reasonably necessary to implement that API, Contributor must
+ also include this information in the LEGAL file.
+
+ (c) Representations.
+ Contributor represents that, except as disclosed pursuant to
+ Section 3.4(a) above, Contributor believes that Contributor's
+ Modifications are Contributor's original creation(s) and/or
+ Contributor has sufficient rights to grant the rights conveyed by
+ this License.
+
+ 3.5. Required Notices.
+ You must duplicate the notice in Exhibit A in each file of the Source
+ Code. If it is not possible to put such notice in a particular Source
+ Code file due to its structure, then You must include such notice in a
+ location (such as a relevant directory) where a user would be likely
+ to look for such a notice. If You created one or more Modification(s)
+ You may add your name as a Contributor to the notice described in
+ Exhibit A. You must also duplicate this License in any documentation
+ for the Source Code where You describe recipients' rights or ownership
+ rights relating to Covered Code. You may choose to offer, and to
+ charge a fee for, warranty, support, indemnity or liability
+ obligations to one or more recipients of Covered Code. However, You
+ may do so only on Your own behalf, and not on behalf of the Initial
+ Developer or any Contributor. You must make it absolutely clear than
+ any such warranty, support, indemnity or liability obligation is
+ offered by You alone, and You hereby agree to indemnify the Initial
+ Developer and every Contributor for any liability incurred by the
+ Initial Developer or such Contributor as a result of warranty,
+ support, indemnity or liability terms You offer.
+
+ 3.6. Distribution of Executable Versions.
+ You may distribute Covered Code in Executable form only if the
+ requirements of Section 3.1-3.5 have been met for that Covered Code,
+ and if You include a notice stating that the Source Code version of
+ the Covered Code is available under the terms of this License,
+ including a description of how and where You have fulfilled the
+ obligations of Section 3.2. The notice must be conspicuously included
+ in any notice in an Executable version, related documentation or
+ collateral in which You describe recipients' rights relating to the
+ Covered Code. You may distribute the Executable version of Covered
+ Code or ownership rights under a license of Your choice, which may
+ contain terms different from this License, provided that You are in
+ compliance with the terms of this License and that the license for the
+ Executable version does not attempt to limit or alter the recipient's
+ rights in the Source Code version from the rights set forth in this
+ License. If You distribute the Executable version under a different
+ license You must make it absolutely clear that any terms which differ
+ from this License are offered by You alone, not by the Initial
+ Developer or any Contributor. You hereby agree to indemnify the
+ Initial Developer and every Contributor for any liability incurred by
+ the Initial Developer or such Contributor as a result of any such
+ terms You offer.
+
+ 3.7. Larger Works.
+ You may create a Larger Work by combining Covered Code with other code
+ not governed by the terms of this License and distribute the Larger
+ Work as a single product. In such a case, You must make sure the
+ requirements of this License are fulfilled for the Covered Code.
+
+4. Inability to Comply Due to Statute or Regulation.
+
+ If it is impossible for You to comply with any of the terms of this
+ License with respect to some or all of the Covered Code due to
+ statute, judicial order, or regulation then You must: (a) comply with
+ the terms of this License to the maximum extent possible; and (b)
+ describe the limitations and the code they affect. Such description
+ must be included in the LEGAL file described in Section 3.4 and must
+ be included with all distributions of the Source Code. Except to the
+ extent prohibited by statute or regulation, such description must be
+ sufficiently detailed for a recipient of ordinary skill to be able to
+ understand it.
+
+5. Application of this License.
+
+ This License applies to code to which the Initial Developer has
+ attached the notice in Exhibit A and to related Covered Code.
+
+6. Versions of the License.
+
+ 6.1. New Versions.
+ Netscape Communications Corporation ("Netscape") may publish revised
+ and/or new versions of the License from time to time. Each version
+ will be given a distinguishing version number.
+
+ 6.2. Effect of New Versions.
+ Once Covered Code has been published under a particular version of the
+ License, You may always continue to use it under the terms of that
+ version. You may also choose to use such Covered Code under the terms
+ of any subsequent version of the License published by Netscape. No one
+ other than Netscape has the right to modify the terms applicable to
+ Covered Code created under this License.
+
+ 6.3. Derivative Works.
+ If You create or use a modified version of this License (which you may
+ only do in order to apply it to code which is not already Covered Code
+ governed by this License), You must (a) rename Your license so that
+ the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape",
+ "MPL", "NPL" or any confusingly similar phrase do not appear in your
+ license (except to note that your license differs from this License)
+ and (b) otherwise make it clear that Your version of the license
+ contains terms which differ from the Mozilla Public License and
+ Netscape Public License. (Filling in the name of the Initial
+ Developer, Original Code or Contributor in the notice described in
+ Exhibit A shall not of themselves be deemed to be modifications of
+ this License.)
+
+7. DISCLAIMER OF WARRANTY.
+
+ COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+ WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+ WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF
+ DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING.
+ THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE
+ IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT,
+ YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE
+ COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER
+ OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF
+ ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
+
+8. TERMINATION.
+
+ 8.1. This License and the rights granted hereunder will terminate
+ automatically if You fail to comply with terms herein and fail to cure
+ such breach within 30 days of becoming aware of the breach. All
+ sublicenses to the Covered Code which are properly granted shall
+ survive any termination of this License. Provisions which, by their
+ nature, must remain in effect beyond the termination of this License
+ shall survive.
+
+ 8.2. If You initiate litigation by asserting a patent infringement
+ claim (excluding declatory judgment actions) against Initial Developer
+ or a Contributor (the Initial Developer or Contributor against whom
+ You file such action is referred to as "Participant") alleging that:
+
+ (a) such Participant's Contributor Version directly or indirectly
+ infringes any patent, then any and all rights granted by such
+ Participant to You under Sections 2.1 and/or 2.2 of this License
+ shall, upon 60 days notice from Participant terminate prospectively,
+ unless if within 60 days after receipt of notice You either: (i)
+ agree in writing to pay Participant a mutually agreeable reasonable
+ royalty for Your past and future use of Modifications made by such
+ Participant, or (ii) withdraw Your litigation claim with respect to
+ the Contributor Version against such Participant. If within 60 days
+ of notice, a reasonable royalty and payment arrangement are not
+ mutually agreed upon in writing by the parties or the litigation claim
+ is not withdrawn, the rights granted by Participant to You under
+ Sections 2.1 and/or 2.2 automatically terminate at the expiration of
+ the 60 day notice period specified above.
+
+ (b) any software, hardware, or device, other than such Participant's
+ Contributor Version, directly or indirectly infringes any patent, then
+ any rights granted to You by such Participant under Sections 2.1(b)
+ and 2.2(b) are revoked effective as of the date You first made, used,
+ sold, distributed, or had made, Modifications made by that
+ Participant.
+
+ 8.3. If You assert a patent infringement claim against Participant
+ alleging that such Participant's Contributor Version directly or
+ indirectly infringes any patent where such claim is resolved (such as
+ by license or settlement) prior to the initiation of patent
+ infringement litigation, then the reasonable value of the licenses
+ granted by such Participant under Sections 2.1 or 2.2 shall be taken
+ into account in determining the amount or value of any payment or
+ license.
+
+ 8.4. In the event of termination under Sections 8.1 or 8.2 above,
+ all end user license agreements (excluding distributors and resellers)
+ which have been validly granted by You or any distributor hereunder
+ prior to termination shall survive termination.
+
+9. LIMITATION OF LIABILITY.
+
+ UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+ (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL
+ DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE,
+ OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR
+ ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY
+ CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL,
+ WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER
+ COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN
+ INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF
+ LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY
+ RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+ PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE
+ EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO
+ THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
+
+10. U.S. GOVERNMENT END USERS.
+
+ The Covered Code is a "commercial item," as that term is defined in
+ 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
+ software" and "commercial computer software documentation," as such
+ terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48
+ C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995),
+ all U.S. Government End Users acquire Covered Code with only those
+ rights set forth herein.
+
+11. MISCELLANEOUS.
+
+ This License represents the complete agreement concerning subject
+ matter hereof. If any provision of this License is held to be
+ unenforceable, such provision shall be reformed only to the extent
+ necessary to make it enforceable. This License shall be governed by
+ California law provisions (except to the extent applicable law, if
+ any, provides otherwise), excluding its conflict-of-law provisions.
+ With respect to disputes in which at least one party is a citizen of,
+ or an entity chartered or registered to do business in the United
+ States of America, any litigation relating to this License shall be
+ subject to the jurisdiction of the Federal Courts of the Northern
+ District of California, with venue lying in Santa Clara County,
+ California, with the losing party responsible for costs, including
+ without limitation, court costs and reasonable attorneys' fees and
+ expenses. The application of the United Nations Convention on
+ Contracts for the International Sale of Goods is expressly excluded.
+ Any law or regulation which provides that the language of a contract
+ shall be construed against the drafter shall not apply to this
+ License.
+
+12. RESPONSIBILITY FOR CLAIMS.
+
+ As between Initial Developer and the Contributors, each party is
+ responsible for claims and damages arising, directly or indirectly,
+ out of its utilization of rights under this License and You agree to
+ work with Initial Developer and Contributors to distribute such
+ responsibility on an equitable basis. Nothing herein is intended or
+ shall be deemed to constitute any admission of liability.
+
+13. MULTIPLE-LICENSED CODE.
+
+ Initial Developer may designate portions of the Covered Code as
+ "Multiple-Licensed". "Multiple-Licensed" means that the Initial
+ Developer permits you to utilize portions of the Covered Code under
+ Your choice of the NPL or the alternative licenses, if any, specified
+ by the Initial Developer in the file described in Exhibit A.
+
+EXHIBIT A -Mozilla Public License.
+
+ ``The contents of this file are subject to the Mozilla Public License
+ Version 1.1 (the "License"); you may not use this file except in
+ compliance with the License. You may obtain a copy of the License at
+ http://www.mozilla.org/MPL/
+
+ Software distributed under the License is distributed on an "AS IS"
+ basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+ License for the specific language governing rights and limitations
+ under the License.
+
+ The Original Code is ______________________________________.
+
+ The Initial Developer of the Original Code is ________________________.
+ Portions created by ______________________ are Copyright (C) ______
+ _______________________. All Rights Reserved.
+
+ Contributor(s): ______________________________________.
+
+ Alternatively, the contents of this file may be used under the terms
+ of the _____ license (the "[___] License"), in which case the
+ provisions of [______] License are applicable instead of those
+ above. If you wish to allow use of your version of this file only
+ under the terms of the [____] License and not to allow others to use
+ your version of this file under the MPL, indicate your decision by
+ deleting the provisions above and replace them with the notice and
+ other provisions required by the [___] License. If you do not delete
+ the provisions above, a recipient may use your version of this file
+ under either the MPL or the [___] License."
+
+ [NOTE: The text of this Exhibit A may differ slightly from the text of
+ the notices in the Source Code files of the Original Code. You should
+ use the text of this Exhibit A rather than the text found in the
+ Original Code Source Code for Your Modifications.]
+
diff --git a/extensions/spellcheck/hunspell/src/README.md b/extensions/spellcheck/hunspell/src/README.md
new file mode 100644
index 0000000000..90ef880e86
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/README.md
@@ -0,0 +1,325 @@
+# About Hunspell
+
+Hunspell is a free spell checker and morphological analyzer library
+and command-line tool, licensed under LGPL/GPL/MPL tri-license.
+
+Hunspell is used by LibreOffice office suite, free browsers, like
+Mozilla Firefox and Google Chrome, and other tools and OSes, like
+Linux distributions and macOS. It is also a command-line tool for
+Linux, Unix-like and other OSes.
+
+It is designed for quick and high quality spell checking and
+correcting for languages with word-level writing system,
+including languages with rich morphology, complex word compounding
+and character encoding.
+
+Hunspell interfaces: Ispell-like terminal interface using Curses
+library, Ispell pipe interface, C++/C APIs and shared library, also
+with existing language bindings for other programming languages.
+
+Hunspell's code base comes from OpenOffice.org's MySpell library,
+developed by Kevin Hendricks (originally a C++ reimplementation of
+spell checking and affixation of Geoff Kuenning's International
+Ispell from scratch, later extended with eg. n-gram suggestions),
+see http://lingucomponent.openoffice.org/MySpell-3.zip, and
+its README, CONTRIBUTORS and license.readme (here: license.myspell) files.
+
+Main features of Hunspell library, developed by László Németh:
+
+ - Unicode support
+ - Highly customizable suggestions: word-part replacement tables and
+ stem-level phonetic and other alternative transcriptions to recognize
+ and fix all typical misspellings, don't suggest offensive words etc.
+ - Complex morphology: dictionary and affix homonyms; twofold affix
+ stripping to handle inflectional and derivational morpheme groups for
+ agglutinative languages, like Azeri, Basque, Estonian, Finnish, Hungarian,
+ Turkish; 64 thousand affix classes with arbitrary number of affixes;
+ conditional affixes, circumfixes, fogemorphemes, zero morphemes,
+ virtual dictionary stems, forbidden words to avoid overgeneration etc.
+ - Handling complex compounds (for example, for Finno-Ugric, German and
+ Indo-Aryan languages): recognizing compounds made of arbitrary
+ number of words, handle affixation within compounds etc.
+ - Custom dictionaries with affixation
+ - Stemming
+ - Morphological analysis (in custom item and arrangement style)
+ - Morphological generation
+ - SPELLML XML API over plain spell() API function for easier integration
+ of stemming, morpological generation and custom dictionaries with affixation
+ - Language specific algorithms, like special casing of Azeri or Turkish
+ dotted i and German sharp s, and special compound rules of Hungarian.
+
+Main features of Hunspell command line tool, developed by László Németh:
+
+ - Reimplementation of quick interactive interface of Geoff Kuenning's Ispell
+ - Parsing formats: text, OpenDocument, TeX/LaTeX, HTML/SGML/XML, nroff/troff
+ - Custom dictionaries with optional affixation, specified by a model word
+ - Multiple dictionary usage (for example hunspell -d en_US,de_DE,de_medical)
+ - Various filtering options (bad or good words/lines)
+ - Morphological analysis (option -m)
+ - Stemming (option -s)
+
+See man hunspell, man 3 hunspell, man 5 hunspell for complete manual.
+
+Translations: Hunspell has been translated into several languages already. If your language is missing or incomplete, please use [Weblate](https://hosted.weblate.org/engage/hunspell/) to help translate Hunspell.
+
+<a href="https://hosted.weblate.org/engage/hunspell/">
+<img src="https://hosted.weblate.org/widgets/hunspell/-/translations/horizontal-auto.svg" alt="Stanje prijevoda" />
+</a>
+
+# Dependencies
+
+Build only dependencies:
+
+ g++ make autoconf automake autopoint libtool
+
+Runtime dependencies:
+
+| | Mandatory | Optional |
+|---------------|------------------|------------------|
+|libhunspell | | |
+|hunspell tool | libiconv gettext | ncurses readline |
+
+# Compiling on GNU/Linux and Unixes
+
+We first need to download the dependencies. On Linux, `gettext` and
+`libiconv` are part of the standard library. On other Unixes we
+need to manually install them.
+
+For Ubuntu:
+
+ sudo apt install autoconf automake autopoint libtool
+
+Then run the following commands:
+
+ autoreconf -vfi
+ ./configure
+ make
+ sudo make install
+ sudo ldconfig
+
+For dictionary development, use the `--with-warnings` option of
+configure.
+
+For interactive user interface of Hunspell executable, use the
+`--with-ui` option.
+
+Optional developer packages:
+
+ - ncurses (need for --with-ui), eg. libncursesw5 for UTF-8
+ - readline (for fancy input line editing, configure parameter:
+ --with-readline)
+
+In Ubuntu, the packages are:
+
+ libncurses5-dev libreadline-dev
+
+# Compiling on OSX and macOS
+
+On macOS for compiler always use `clang` and not `g++` because Homebrew
+dependencies are build with that.
+
+ brew install autoconf automake libtool gettext
+ brew link gettext --force
+
+Then run:
+
+ autoreconf -vfi
+ ./configure
+ make
+
+# Compiling on Windows
+
+## Compiling with Mingw64 and MSYS2
+
+Download Msys2, update everything and install the following
+ packages:
+
+ pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-libtool
+
+Open Mingw-w64 Win64 prompt and compile the same way as on Linux, see
+above.
+
+## Compiling in Cygwin environment
+
+Download and install Cygwin environment for Windows with the following
+extra packages:
+
+ - make
+ - automake
+ - autoconf
+ - libtool
+ - gcc-g++ development package
+ - ncurses, readline (for user interface)
+ - iconv (character conversion)
+
+Then compile the same way as on Linux. Cygwin builds depend on
+Cygwin1.dll.
+
+# Debugging
+
+It is recommended to install a debug build of the standard library:
+
+ libstdc++6-6-dbg
+
+For debugging we need to create a debug build and then we need to start
+`gdb`.
+
+ ./configure CXXFLAGS='-g -O0 -Wall -Wextra'
+ make
+ ./libtool --mode=execute gdb src/tools/hunspell
+
+You can also pass the `CXXFLAGS` directly to `make` without calling
+`./configure`, but we don't recommend this way during long development
+sessions.
+
+If you like to develop and debug with an IDE, see documentation at
+https://github.com/hunspell/hunspell/wiki/IDE-Setup
+
+# Testing
+
+Testing Hunspell (see tests in tests/ subdirectory):
+
+ make check
+
+or with Valgrind debugger:
+
+ make check
+ VALGRIND=[Valgrind_tool] make check
+
+For example:
+
+ make check
+ VALGRIND=memcheck make check
+
+# Documentation
+
+features and dictionary format:
+
+ man 5 hunspell
+ man hunspell
+ hunspell -h
+
+http://hunspell.github.io/
+
+# Usage
+
+After compiling and installing (see INSTALL) you can run the Hunspell
+spell checker (compiled with user interface) with a Hunspell or Myspell
+dictionary:
+
+ hunspell -d en_US text.txt
+
+or without interface:
+
+ hunspell
+ hunspell -d en_GB -l <text.txt
+
+Dictionaries consist of an affix (.aff) and dictionary (.dic) file, for
+example, download American English dictionary files of LibreOffice
+(older version, but with stemming and morphological generation) with
+
+ wget -O en_US.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.aff?id=a4473e06b56bfe35187e302754f6baaa8d75e54f
+ wget -O en_US.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.dic?id=a4473e06b56bfe35187e302754f6baaa8d75e54f
+
+and with command line input and output, it's possible to check its work quickly,
+for example with the input words "example", "examples", "teached" and
+"verybaaaaaaaaaaaaaaaaaaaaaad":
+
+ $ hunspell -d en_US
+ Hunspell 1.7.0
+ example
+ *
+
+ examples
+ + example
+
+ teached
+ & teached 9 0: taught, teased, reached, teaches, teacher, leached, beached
+
+ verybaaaaaaaaaaaaaaaaaaaaaad
+ # verybaaaaaaaaaaaaaaaaaaaaaad 0
+
+Where in the output, `*` and `+` mean correct (accepted) words (`*` = dictionary stem,
+`+` = affixed forms of the following dictionary stem), and
+`&` and `#` mean bad (rejected) words (`&` = with suggestions, `#` = without suggestions)
+(see man hunspell).
+
+Example for stemming:
+
+ $ hunspell -d en_US -s
+ mice
+ mice mouse
+
+Example for morphological analysis (very limited with this English dictionary):
+
+ $ hunspell -d en_US -m
+ mice
+ mice st:mouse ts:Ns
+
+ cats
+ cats st:cat ts:0 is:Ns
+ cats st:cat ts:0 is:Vs
+
+# Other executables
+
+The src/tools directory contains the following executables after compiling.
+
+ - The main executable:
+ - hunspell: main program for spell checking and others (see
+ manual)
+ - Example tools:
+ - analyze: example of spell checking, stemming and morphological
+ analysis
+ - chmorph: example of automatic morphological generation and
+ conversion
+ - example: example of spell checking and suggestion
+ - Tools for dictionary development:
+ - affixcompress: dictionary generation from large (millions of
+ words) vocabularies
+ - makealias: alias compression (Hunspell only, not back compatible
+ with MySpell)
+ - wordforms: word generation (Hunspell version of unmunch)
+ - hunzip: decompressor of hzip format
+ - hzip: compressor of hzip format
+ - munch (DEPRECATED, use affixcompress): dictionary generation
+ from vocabularies (it needs an affix file, too).
+ - unmunch (DEPRECATED, use wordforms): list all recognized words
+ of a MySpell dictionary
+
+Example for morphological generation:
+
+ $ ~/hunspell/src/tools/analyze en_US.aff en_US.dic /dev/stdin
+ cat mice
+ generate(cat, mice) = cats
+ mouse cats
+ generate(mouse, cats) = mice
+ generate(mouse, cats) = mouses
+
+# Using Hunspell library with GCC
+
+Including in your program:
+
+ #include <hunspell.hxx>
+
+Linking with Hunspell static library:
+
+ g++ -lhunspell-1.7 example.cxx
+ # or better, use pkg-config
+ g++ $(pkg-config --cflags --libs hunspell) example.cxx
+
+## Dictionaries
+
+Hunspell (MySpell) dictionaries:
+
+ - https://wiki.documentfoundation.org/Language_support_of_LibreOffice
+ - http://cgit.freedesktop.org/libreoffice/dictionaries
+ - http://extensions.libreoffice.org
+ - https://extensions.openoffice.org
+ - https://wiki.openoffice.org/wiki/Dictionaries
+
+Aspell dictionaries (conversion: man 5 hunspell):
+
+ - ftp://ftp.gnu.org/gnu/aspell/dict
+
+László Németh, nemeth at numbertext org
+
diff --git a/extensions/spellcheck/hunspell/src/affentry.cxx b/extensions/spellcheck/hunspell/src/affentry.cxx
new file mode 100644
index 0000000000..2cf4f4671f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affentry.cxx
@@ -0,0 +1,983 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "affentry.hxx"
+#include "csutil.hxx"
+
+AffEntry::~AffEntry() {
+ if (opts & aeLONGCOND)
+ free(c.l.conds2);
+ if (morphcode && !(opts & aeALIASM))
+ free(morphcode);
+ if (contclass && !(opts & aeALIASF))
+ free(contclass);
+}
+
+PfxEntry::PfxEntry(AffixMgr* pmgr)
+ // register affix manager
+ : pmyMgr(pmgr),
+ next(NULL),
+ nexteq(NULL),
+ nextne(NULL),
+ flgnxt(NULL) {
+}
+
+// add prefix to this word assuming conditions hold
+std::string PfxEntry::add(const char* word, size_t len) {
+ std::string result;
+ if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word) &&
+ (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
+ /* we have a match so add prefix */
+ result.assign(appnd);
+ result.append(word + strip.size());
+ }
+ return result;
+}
+
+inline char* PfxEntry::nextchar(char* p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.conds + MAXCONDLEN_1)
+ return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN)
+ return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
+
+inline int PfxEntry::test_condition(const char* st) {
+ const char* pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0)
+ return 1;
+ char* p = c.conds;
+ while (1) {
+ switch (*p) {
+ case '\0':
+ return 1;
+ case '[': {
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ pos = st;
+ break;
+ }
+ case '^': {
+ p = nextchar(p);
+ neg = true;
+ break;
+ }
+ case ']': {
+ if (bool(neg) == bool(ingroup))
+ return 0;
+ pos = NULL;
+ p = nextchar(p);
+ // skip the next character
+ if (!ingroup && *st)
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
+ ;
+ if (*st == '\0' && p)
+ return 0; // word <= condition
+ break;
+ }
+ case '.':
+ if (!pos) { // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++)
+ ;
+ if (*st == '\0' && p)
+ return 0; // word <= condition
+ break;
+ }
+ /* FALLTHROUGH */
+ default: {
+ if (*st == *p) {
+ st++;
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte
+ while (p && (*p & 0xc0) == 0x80) { // character
+ if (*p != *st) {
+ if (!pos)
+ return 0;
+ st = pos;
+ break;
+ }
+ p = nextchar(p);
+ st++;
+ }
+ if (pos && st != pos) {
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ }
+ } else if (pos) {
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ }
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else
+ return 0;
+ }
+ }
+ if (!p)
+ return 1;
+ }
+}
+
+// check if this prefix entry matches
+struct hentry* PfxEntry::checkword(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* he; // hash entry of root word or NULL
+
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size(), tmpl);
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
+ do {
+ if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with needaffix flag
+ !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
+ // needflag
+ ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+ (contclass && TESTAFF(contclass, needflag, contclasslen))))
+ return he;
+ he = he->next_homonym; // check homonyms
+ } while (he);
+ }
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ // if ((opts & aeXPRODUCT) && in_compound) {
+ if ((opts & aeXPRODUCT)) {
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ FLAG_NULL, needflag, in_compound);
+ if (he)
+ return he;
+ }
+ }
+ }
+ return NULL;
+}
+
+// check if this prefix entry matches
+struct hentry* PfxEntry::check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // cross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ // hash entry of root word or NULL
+ struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ needflag);
+ if (he)
+ return he;
+ }
+ }
+ }
+ return NULL;
+}
+
+// check if this prefix entry matches
+std::string PfxEntry::check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ std::string result;
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
+ aeXPRODUCT,
+ this, needflag);
+ }
+ }
+ }
+ return result;
+}
+
+// check if this prefix entry matches
+std::string PfxEntry::check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ std::string result;
+
+ // on entry prefix is 0 length or already matches the beginning of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing prefix and adding
+ // back any characters that would have been stripped
+
+ std::string tmpword(strip);
+ tmpword.append(word + appnd.size());
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(tmpword.c_str())) {
+ tmpl += strip.size();
+ struct hentry* he; // hash entry of root word or NULL
+ if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
+ do {
+ if (TESTAFF(he->astr, aflag, he->alen) &&
+ // forbid single prefixes with needaffix flag
+ !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) &&
+ // needflag
+ ((!needflag) || TESTAFF(he->astr, needflag, he->alen) ||
+ (contclass && TESTAFF(contclass, needflag, contclasslen)))) {
+ if (morphcode) {
+ result.push_back(MSEP_FLD);
+ result.append(morphcode);
+ } else
+ result.append(getKey());
+ if (!HENTRY_FIND(he, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(he));
+ }
+ // store the pointer of the hash entry
+ if (HENTRY_DATA(he)) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(he));
+ } else {
+ // return with debug information
+ char* flag = pmyMgr->encode_flag(getFlag());
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_FLAG);
+ result.append(flag);
+ free(flag);
+ }
+ result.push_back(MSEP_REC);
+ }
+ he = he->next_homonym;
+ } while (he);
+ }
+
+ // prefix matched but no root word was found
+ // if aeXPRODUCT is allowed, try again but now
+ // ross checked combined with a suffix
+
+ if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
+ std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
+ FLAG_NULL, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+SfxEntry::SfxEntry(AffixMgr* pmgr)
+ : pmyMgr(pmgr) // register affix manager
+ ,
+ next(NULL),
+ nexteq(NULL),
+ nextne(NULL),
+ flgnxt(NULL),
+ l_morph(NULL),
+ r_morph(NULL),
+ eq_morph(NULL) {
+}
+
+// add suffix to this word assuming conditions hold
+std::string SfxEntry::add(const char* word, size_t len) {
+ std::string result;
+ /* make sure all conditions match */
+ if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
+ (len >= numconds) && test_condition(word + len, word) &&
+ (!strip.size() ||
+ (strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
+ result.assign(word);
+ /* we have a match so add suffix */
+ result.replace(len - strip.size(), std::string::npos, appnd);
+ }
+ return result;
+}
+
+inline char* SfxEntry::nextchar(char* p) {
+ if (p) {
+ p++;
+ if (opts & aeLONGCOND) {
+ // jump to the 2nd part of the condition
+ if (p == c.l.conds1 + MAXCONDLEN_1)
+ return c.l.conds2;
+ // end of the MAXCONDLEN length condition
+ } else if (p == c.conds + MAXCONDLEN)
+ return NULL;
+ return *p ? p : NULL;
+ }
+ return NULL;
+}
+
+inline int SfxEntry::test_condition(const char* st, const char* beg) {
+ const char* pos = NULL; // group with pos input position
+ bool neg = false; // complementer
+ bool ingroup = false; // character in the group
+ if (numconds == 0)
+ return 1;
+ char* p = c.conds;
+ st--;
+ int i = 1;
+ while (1) {
+ switch (*p) {
+ case '\0':
+ return 1;
+ case '[':
+ p = nextchar(p);
+ pos = st;
+ break;
+ case '^':
+ p = nextchar(p);
+ neg = true;
+ break;
+ case ']':
+ if (!neg && !ingroup)
+ return 0;
+ i++;
+ // skip the next character
+ if (!ingroup) {
+ for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--)
+ ;
+ st--;
+ }
+ pos = NULL;
+ neg = false;
+ ingroup = false;
+ p = nextchar(p);
+ if (st < beg && p)
+ return 0; // word <= condition
+ break;
+ case '.':
+ if (!pos) {
+ // dots are not metacharacters in groups: [.]
+ p = nextchar(p);
+ // skip the next character
+ for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80;
+ st--)
+ ;
+ if (st < beg) { // word <= condition
+ if (p)
+ return 0;
+ else
+ return 1;
+ }
+ if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character
+ st--;
+ if (st < beg) { // word <= condition
+ if (p)
+ return 0;
+ else
+ return 1;
+ }
+ }
+ break;
+ }
+ /* FALLTHROUGH */
+ default: {
+ if (*st == *p) {
+ p = nextchar(p);
+ if ((opts & aeUTF8) && (*st & 0x80)) {
+ st--;
+ while (p && (st >= beg)) {
+ if (*p != *st) {
+ if (!pos)
+ return 0;
+ st = pos;
+ break;
+ }
+ // first byte of the UTF-8 multibyte character
+ if ((*p & 0xc0) != 0x80)
+ break;
+ p = nextchar(p);
+ st--;
+ }
+ if (pos && st != pos) {
+ if (neg)
+ return 0;
+ else if (i == numconds)
+ return 1;
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ st--;
+ }
+ if (p && *p != ']')
+ p = nextchar(p);
+ } else if (pos) {
+ if (neg)
+ return 0;
+ else if (i == numconds)
+ return 1;
+ ingroup = true;
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL)) {
+ }
+ // if (p && *p != ']') p = nextchar(p);
+ st--;
+ }
+ if (!pos) {
+ i++;
+ st--;
+ }
+ if (st < beg && p && *p != ']')
+ return 0; // word <= condition
+ } else if (pos) { // group
+ p = nextchar(p);
+ } else
+ return 0;
+ }
+ }
+ if (!p)
+ return 1;
+ }
+}
+
+// see if this suffix is present in the word
+struct hentry* SfxEntry::checkword(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag,
+ const FLAG badflag) {
+ struct hentry* he; // hash entry pointer
+ PfxEntry* ep = ppfx;
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0))
+ return NULL;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+ // the second condition is not enough for UTF-8 strings
+ // it checked in test_condition()
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpstring(word, tmpl);
+ if (strip.size()) {
+ tmpstring.append(strip);
+ }
+
+ const char* tmpword = tmpstring.c_str();
+ const char* endword = tmpword + tmpstring.size();
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then check if resulting
+ // root word in the dictionary
+
+ if (test_condition(endword, tmpword)) {
+#ifdef SZOSZABLYA_POSSIBLE_ROOTS
+ fprintf(stdout, "%s %s %c\n", word, tmpword, aflag);
+#endif
+ if ((he = pmyMgr->lookup(tmpword)) != NULL) {
+ do {
+ // check conditional suffix (enabled by prefix)
+ if ((TESTAFF(he->astr, aflag, he->alen) ||
+ (ep && ep->getCont() &&
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ (((optflags & aeXPRODUCT) == 0) ||
+ (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
+ // enabled by prefix
+ ((contclass) &&
+ (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) &&
+ // handle cont. class
+ ((!cclass) ||
+ ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
+ // check only in compound homonyms (bad flags)
+ (!badflag || !TESTAFF(he->astr, badflag, he->alen)) &&
+ // handle required flag
+ ((!needflag) ||
+ (TESTAFF(he->astr, needflag, he->alen) ||
+ ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
+ return he;
+ he = he->next_homonym; // check homonyms
+ } while (he);
+ }
+ }
+ }
+ return NULL;
+}
+
+// see if two-level suffix is present in the word
+struct hentry* SfxEntry::check_twosfx(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ PfxEntry* ep = ppfx;
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
+ return NULL;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then recall suffix_check
+
+ if (test_condition(end, beg)) {
+ struct hentry* he; // hash entry pointer
+ if (ppfx) {
+ // handle conditional suffix
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
+ (FLAG)aflag, needflag, IN_CPD_NOT);
+ else
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
+ (FLAG)aflag, needflag, IN_CPD_NOT);
+ } else {
+ he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
+ (FLAG)aflag, needflag, IN_CPD_NOT);
+ }
+ if (he)
+ return he;
+ }
+ }
+ return NULL;
+}
+
+// see if two-level suffix is present in the word
+std::string SfxEntry::check_twosfx_morph(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ PfxEntry* ep = ppfx;
+
+ std::string result;
+
+ // if this suffix is being cross checked with a prefix
+ // but it does not support cross products skip it
+
+ if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
+ return result;
+
+ // upon entry suffix is 0 length or already matches the end of the word.
+ // So if the remaining root word has positive length
+ // and if there are enough chars in root word and added back strip chars
+ // to meet the number of characters conditions, then test it
+
+ int tmpl = len - appnd.size(); // length of tmpword
+
+ if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
+ (tmpl + strip.size() >= numconds)) {
+ // generate new root word by removing suffix and adding
+ // back any characters that would have been stripped or
+ // or null terminating the shorter string
+
+ std::string tmpword(word);
+ tmpword.resize(tmpl);
+ tmpword.append(strip);
+ tmpl += strip.size();
+
+ const char* beg = tmpword.c_str();
+ const char* end = beg + tmpl;
+
+ // now make sure all of the conditions on characters
+ // are met. Please see the appendix at the end of
+ // this file for more info on exactly what is being
+ // tested
+
+ // if all conditions are met then recall suffix_check
+
+ if (test_condition(end, beg)) {
+ if (ppfx) {
+ // handle conditional suffix
+ if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
+ std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
+ needflag);
+ if (!st.empty()) {
+ if (ppfx->getMorph()) {
+ result.append(ppfx->getMorph());
+ result.push_back(MSEP_FLD);
+ }
+ result.append(st);
+ mychomp(result);
+ }
+ } else {
+ std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
+ needflag);
+ if (!st.empty()) {
+ result.append(st);
+ mychomp(result);
+ }
+ }
+ } else {
+ std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ mychomp(result);
+ }
+ }
+ }
+ }
+ return result;
+}
+
+// get next homonym with same affix
+struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag) {
+ PfxEntry* ep = ppfx;
+ FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
+
+ while (he->next_homonym) {
+ he = he->next_homonym;
+ if ((TESTAFF(he->astr, aflag, he->alen) ||
+ (ep && ep->getCont() &&
+ TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
+ ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) ||
+ // handle conditional suffix
+ ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) &&
+ // handle cont. class
+ ((!cclass) ||
+ ((contclass) && TESTAFF(contclass, cclass, contclasslen))) &&
+ // handle required flag
+ ((!needflag) ||
+ (TESTAFF(he->astr, needflag, he->alen) ||
+ ((contclass) && TESTAFF(contclass, needflag, contclasslen)))))
+ return he;
+ }
+ return NULL;
+}
+
+void SfxEntry::initReverseWord() {
+ rappnd = appnd;
+ reverseword(rappnd);
+}
+
+#if 0
+
+Appendix: Understanding Affix Code
+
+
+An affix is either a prefix or a suffix attached to root words to make
+other words.
+
+Basically a Prefix or a Suffix is set of AffEntry objects
+which store information about the prefix or suffix along
+with supporting routines to check if a word has a particular
+prefix or suffix or a combination.
+
+The structure affentry is defined as follows:
+
+struct affentry
+{
+ unsigned short aflag; // ID used to represent the affix
+ std::string strip; // string to strip before adding affix
+ std::string appnd; // the affix string to add
+ char numconds; // the number of conditions that must be met
+ char opts; // flag: aeXPRODUCT- combine both prefix and suffix
+ char conds[SETSIZE]; // array which encodes the conditions to be met
+};
+
+
+Here is a suffix borrowed from the en_US.aff file. This file
+is whitespace delimited.
+
+SFX D Y 4
+SFX D 0 e d
+SFX D y ied [^aeiou]y
+SFX D 0 ed [^ey]
+SFX D 0 ed [aeiou]y
+
+This information can be interpreted as follows:
+
+In the first line has 4 fields
+
+Field
+-----
+1 SFX - indicates this is a suffix
+2 D - is the name of the character flag which represents this suffix
+3 Y - indicates it can be combined with prefixes (cross product)
+4 4 - indicates that sequence of 4 affentry structures are needed to
+ properly store the affix information
+
+The remaining lines describe the unique information for the 4 SfxEntry
+objects that make up this affix. Each line can be interpreted
+as follows: (note fields 1 and 2 are as a check against line 1 info)
+
+Field
+-----
+1 SFX - indicates this is a suffix
+2 D - is the name of the character flag for this affix
+3 y - the string of chars to strip off before adding affix
+ (a 0 here indicates the NULL string)
+4 ied - the string of affix characters to add
+5 [^aeiou]y - the conditions which must be met before the affix
+ can be applied
+
+Field 5 is interesting. Since this is a suffix, field 5 tells us that
+there are 2 conditions that must be met. The first condition is that
+the next to the last character in the word must *NOT* be any of the
+following "a", "e", "i", "o" or "u". The second condition is that
+the last character of the word must end in "y".
+
+So how can we encode this information concisely and be able to
+test for both conditions in a fast manner? The answer is found
+but studying the wonderful ispell code of Geoff Kuenning, et.al.
+(now available under a normal BSD license).
+
+If we set up a conds array of 256 bytes indexed (0 to 255) and access it
+using a character (cast to an unsigned char) of a string, we have 8 bits
+of information we can store about that character. Specifically we
+could use each bit to say if that character is allowed in any of the
+last (or first for prefixes) 8 characters of the word.
+
+Basically, each character at one end of the word (up to the number
+of conditions) is used to index into the conds array and the resulting
+value found there says whether the that character is valid for a
+specific character position in the word.
+
+For prefixes, it does this by setting bit 0 if that char is valid
+in the first position, bit 1 if valid in the second position, and so on.
+
+If a bit is not set, then that char is not valid for that postion in the
+word.
+
+If working with suffixes bit 0 is used for the character closest
+to the front, bit 1 for the next character towards the end, ...,
+with bit numconds-1 representing the last char at the end of the string.
+
+Note: since entries in the conds[] are 8 bits, only 8 conditions
+(read that only 8 character positions) can be examined at one
+end of a word (the beginning for prefixes and the end for suffixes.
+
+So to make this clearer, lets encode the conds array values for the
+first two affentries for the suffix D described earlier.
+
+
+ For the first affentry:
+ numconds = 1 (only examine the last character)
+
+ conds['e'] = (1 << 0) (the word must end in an E)
+ all others are all 0
+
+ For the second affentry:
+ numconds = 2 (only examine the last two characters)
+
+ conds[X] = conds[X] | (1 << 0) (aeiou are not allowed)
+ where X is all characters *but* a, e, i, o, or u
+
+
+ conds['y'] = (1 << 1) (the last char must be a y)
+ all other bits for all other entries in the conds array are zero
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/affentry.hxx b/extensions/spellcheck/hunspell/src/affentry.hxx
new file mode 100644
index 0000000000..b736bf0350
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affentry.hxx
@@ -0,0 +1,223 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef AFFIX_HXX_
+#define AFFIX_HXX_
+
+#include "atypes.hxx"
+#include "baseaffix.hxx"
+#include "affixmgr.hxx"
+
+/* A Prefix Entry */
+
+class PfxEntry : public AffEntry {
+ private:
+ PfxEntry(const PfxEntry&);
+ PfxEntry& operator=(const PfxEntry&);
+
+ private:
+ AffixMgr* pmyMgr;
+
+ PfxEntry* next;
+ PfxEntry* nexteq;
+ PfxEntry* nextne;
+ PfxEntry* flgnxt;
+
+ public:
+ explicit PfxEntry(AffixMgr* pmgr);
+
+ bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
+ struct hentry* checkword(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ struct hentry* check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+
+ FLAG getFlag() { return aflag; }
+ const char* getKey() { return appnd.c_str(); }
+ std::string add(const char* word, size_t len);
+
+ inline short getKeyLen() { return appnd.size(); }
+
+ inline const char* getMorph() { return morphcode; }
+
+ inline const unsigned short* getCont() { return contclass; }
+ inline short getContLen() { return contclasslen; }
+
+ inline PfxEntry* getNext() { return next; }
+ inline PfxEntry* getNextNE() { return nextne; }
+ inline PfxEntry* getNextEQ() { return nexteq; }
+ inline PfxEntry* getFlgNxt() { return flgnxt; }
+
+ inline void setNext(PfxEntry* ptr) { next = ptr; }
+ inline void setNextNE(PfxEntry* ptr) { nextne = ptr; }
+ inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; }
+ inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; }
+
+ inline char* nextchar(char* p);
+ inline int test_condition(const char* st);
+};
+
+/* A Suffix Entry */
+
+class SfxEntry : public AffEntry {
+ private:
+ SfxEntry(const SfxEntry&);
+ SfxEntry& operator=(const SfxEntry&);
+
+ private:
+ AffixMgr* pmyMgr;
+ std::string rappnd;
+
+ SfxEntry* next;
+ SfxEntry* nexteq;
+ SfxEntry* nextne;
+ SfxEntry* flgnxt;
+
+ SfxEntry* l_morph;
+ SfxEntry* r_morph;
+ SfxEntry* eq_morph;
+
+ public:
+ explicit SfxEntry(AffixMgr* pmgr);
+
+ bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
+ struct hentry* checkword(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag,
+ const FLAG badflag);
+
+ struct hentry* check_twosfx(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string check_twosfx_morph(const char* word,
+ int len,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+ struct hentry* get_next_homonym(struct hentry* he);
+ struct hentry* get_next_homonym(struct hentry* word,
+ int optflags,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag);
+
+ FLAG getFlag() { return aflag; }
+ const char* getKey() { return rappnd.c_str(); }
+ std::string add(const char* word, size_t len);
+
+ inline const char* getMorph() { return morphcode; }
+
+ inline const unsigned short* getCont() { return contclass; }
+ inline short getContLen() { return contclasslen; }
+ inline const char* getAffix() { return appnd.c_str(); }
+
+ inline short getKeyLen() { return appnd.size(); }
+
+ inline SfxEntry* getNext() { return next; }
+ inline SfxEntry* getNextNE() { return nextne; }
+ inline SfxEntry* getNextEQ() { return nexteq; }
+
+ inline SfxEntry* getLM() { return l_morph; }
+ inline SfxEntry* getRM() { return r_morph; }
+ inline SfxEntry* getEQM() { return eq_morph; }
+ inline SfxEntry* getFlgNxt() { return flgnxt; }
+
+ inline void setNext(SfxEntry* ptr) { next = ptr; }
+ inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
+ inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
+ inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
+ void initReverseWord();
+
+ inline char* nextchar(char* p);
+ inline int test_condition(const char* st, const char* begin);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/affixmgr.cxx b/extensions/spellcheck/hunspell/src/affixmgr.cxx
new file mode 100644
index 0000000000..adb750dba1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affixmgr.cxx
@@ -0,0 +1,4875 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <time.h>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "affixmgr.hxx"
+#include "affentry.hxx"
+#include "langnum.hxx"
+
+#include "csutil.hxx"
+
+AffixMgr::AffixMgr(const char* affpath,
+ const std::vector<HashMgr*>& ptr,
+ const char* key)
+ : alldic(ptr)
+ , pHMgr(ptr[0]) {
+
+ // register hash manager and load affix data from aff file
+ csconv = NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ parsedmaptable = false;
+ parsedbreaktable = false;
+ iconvtable = NULL;
+ oconvtable = NULL;
+ // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
+ simplifiedcpd = 0;
+ parsedcheckcpd = false;
+ parseddefcpd = false;
+ phone = NULL;
+ compoundflag = FLAG_NULL; // permits word in compound forms
+ compoundbegin = FLAG_NULL; // may be first word in compound forms
+ compoundmiddle = FLAG_NULL; // may be middle word in compound forms
+ compoundend = FLAG_NULL; // may be last word in compound forms
+ compoundroot = FLAG_NULL; // compound word signing flag
+ compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+ compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+ compoundmoresuffixes = 0; // allow more suffixes within compound words
+ checkcompounddup = 0; // forbid double words in compounds
+ checkcompoundrep = 0; // forbid bad compounds (may be non-compound word with
+ // a REP substitution)
+ checkcompoundcase =
+ 0; // forbid upper and lowercase combinations at word bounds
+ checkcompoundtriple = 0; // forbid compounds with triple letters
+ simplifiedtriple = 0; // allow simplified triple letters in compounds
+ // (Schiff+fahrt -> Schiffahrt)
+ forbiddenword = FORBIDDENWORD; // forbidden word signing flag
+ nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
+ nongramsuggest = FLAG_NULL;
+ langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
+ needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
+ cpdwordmax = -1; // default: unlimited wordcount in compound words
+ cpdmin = -1; // undefined
+ cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
+ pfxappnd = NULL; // previous prefix for counting syllables of the prefix BUG
+ sfxappnd = NULL; // previous suffix for counting syllables of the suffix BUG
+ sfxextra = 0; // modifier for syllable count of sfxappnd BUG
+ checknum = 0; // checking numbers, and word with numbers
+ havecontclass = 0; // flags of possible continuing classes (double affix)
+ // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
+ // in morhological description in dictionary file. It's often combined with
+ // PSEUDOROOT.
+ lemma_present = FLAG_NULL;
+ circumfix = FLAG_NULL;
+ onlyincompound = FLAG_NULL;
+ maxngramsugs = -1; // undefined
+ maxdiff = -1; // undefined
+ onlymaxdiff = 0;
+ maxcpdsugs = -1; // undefined
+ nosplitsugs = 0;
+ sugswithdots = 0;
+ keepcase = 0;
+ forceucase = 0;
+ warn = 0;
+ forbidwarn = 0;
+ checksharps = 0;
+ substandard = FLAG_NULL;
+ fullstrip = 0;
+
+ sfx = NULL;
+ pfx = NULL;
+
+ for (int i = 0; i < SETSIZE; i++) {
+ pStart[i] = NULL;
+ sStart[i] = NULL;
+ pFlag[i] = NULL;
+ sFlag[i] = NULL;
+ }
+
+ for (int j = 0; j < CONTSIZE; j++) {
+ contclasses[j] = 0;
+ }
+
+ if (parse_file(affpath, key)) {
+ HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n", affpath);
+ }
+
+ if (cpdmin == -1)
+ cpdmin = MINCPDLEN;
+}
+
+AffixMgr::~AffixMgr() {
+ // pass through linked prefix entries and clean up
+ for (int i = 0; i < SETSIZE; i++) {
+ pFlag[i] = NULL;
+ PfxEntry* ptr = pStart[i];
+ PfxEntry* nptr = NULL;
+ while (ptr) {
+ nptr = ptr->getNext();
+ delete (ptr);
+ ptr = nptr;
+ nptr = NULL;
+ }
+ }
+
+ // pass through linked suffix entries and clean up
+ for (int j = 0; j < SETSIZE; j++) {
+ sFlag[j] = NULL;
+ SfxEntry* ptr = sStart[j];
+ SfxEntry* nptr = NULL;
+ while (ptr) {
+ nptr = ptr->getNext();
+ delete (ptr);
+ ptr = nptr;
+ nptr = NULL;
+ }
+ sStart[j] = NULL;
+ }
+
+ delete iconvtable;
+ delete oconvtable;
+ delete phone;
+
+ FREE_FLAG(compoundflag);
+ FREE_FLAG(compoundbegin);
+ FREE_FLAG(compoundmiddle);
+ FREE_FLAG(compoundend);
+ FREE_FLAG(compoundpermitflag);
+ FREE_FLAG(compoundforbidflag);
+ FREE_FLAG(compoundroot);
+ FREE_FLAG(forbiddenword);
+ FREE_FLAG(nosuggest);
+ FREE_FLAG(nongramsuggest);
+ FREE_FLAG(needaffix);
+ FREE_FLAG(lemma_present);
+ FREE_FLAG(circumfix);
+ FREE_FLAG(onlyincompound);
+
+ cpdwordmax = 0;
+ pHMgr = NULL;
+ cpdmin = 0;
+ cpdmaxsyllable = 0;
+ free_utf_tbl();
+ checknum = 0;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+}
+
+void AffixMgr::finishFileMgr(FileMgr* afflst) {
+ delete afflst;
+
+ // convert affix trees to sorted list
+ process_pfx_tree_to_list();
+ process_sfx_tree_to_list();
+}
+
+// read in aff file and build up prefix and suffix entry objects
+int AffixMgr::parse_file(const char* affpath, const char* key) {
+
+ // checking flag duplication
+ char dupflags[CONTSIZE];
+ char dupflags_ini = 1;
+
+ // first line indicator for removing byte order mark
+ int firstline = 1;
+
+ // open the affix file
+ FileMgr* afflst = new FileMgr(affpath, key);
+ if (!afflst) {
+ HUNSPELL_WARNING(
+ stderr, "error: could not open affix description file %s\n", affpath);
+ return 1;
+ }
+
+ // step one is to parse the affix file building up the internal
+ // affix data structures
+
+ // read in each line ignoring any that do not
+ // start with a known line type indicator
+ std::string line;
+ while (afflst->getline(line)) {
+ mychomp(line);
+
+ /* remove byte order mark */
+ if (firstline) {
+ firstline = 0;
+ // Affix file begins with byte order mark: possible incompatibility with
+ // old Hunspell versions
+ if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
+ line.erase(0, 3);
+ }
+ }
+
+ /* parse in the keyboard string */
+ if (line.compare(0, 3, "KEY", 3) == 0) {
+ if (!parse_string(line, keystring, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the try string */
+ if (line.compare(0, 3, "TRY", 3) == 0) {
+ if (!parse_string(line, trystring, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the name of the character set used by the .dict and .aff */
+ if (line.compare(0, 3, "SET", 3) == 0) {
+ if (!parse_string(line, encoding, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ if (encoding == "UTF-8") {
+ utf8 = 1;
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ initialize_utf_tbl();
+#endif
+#endif
+ }
+ }
+
+ /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left
+ * writing system */
+ if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
+ complexprefixes = 1;
+
+ /* parse in the flag used by the controlled compound words */
+ if (line.compare(0, 12, "COMPOUNDFLAG", 12) == 0) {
+ if (!parse_flag(line, &compoundflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound words */
+ if (line.compare(0, 13, "COMPOUNDBEGIN", 13) == 0) {
+ if (complexprefixes) {
+ if (!parse_flag(line, &compoundend, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ } else {
+ if (!parse_flag(line, &compoundbegin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ /* parse in the flag used by compound words */
+ if (line.compare(0, 14, "COMPOUNDMIDDLE", 14) == 0) {
+ if (!parse_flag(line, &compoundmiddle, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound words */
+ if (line.compare(0, 11, "COMPOUNDEND", 11) == 0) {
+ if (complexprefixes) {
+ if (!parse_flag(line, &compoundbegin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ } else {
+ if (!parse_flag(line, &compoundend, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ /* parse in the data used by compound_check() method */
+ if (line.compare(0, 15, "COMPOUNDWORDMAX", 15) == 0) {
+ if (!parse_num(line, &cpdwordmax, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag sign compounds in dictionary */
+ if (line.compare(0, 12, "COMPOUNDROOT", 12) == 0) {
+ if (!parse_flag(line, &compoundroot, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (line.compare(0, 18, "COMPOUNDPERMITFLAG", 18) == 0) {
+ if (!parse_flag(line, &compoundpermitflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (line.compare(0, 18, "COMPOUNDFORBIDFLAG", 18) == 0) {
+ if (!parse_flag(line, &compoundforbidflag, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 20, "COMPOUNDMORESUFFIXES", 20) == 0) {
+ compoundmoresuffixes = 1;
+ }
+
+ if (line.compare(0, 16, "CHECKCOMPOUNDDUP", 16) == 0) {
+ checkcompounddup = 1;
+ }
+
+ if (line.compare(0, 16, "CHECKCOMPOUNDREP", 16) == 0) {
+ checkcompoundrep = 1;
+ }
+
+ if (line.compare(0, 19, "CHECKCOMPOUNDTRIPLE", 19) == 0) {
+ checkcompoundtriple = 1;
+ }
+
+ if (line.compare(0, 16, "SIMPLIFIEDTRIPLE", 16) == 0) {
+ simplifiedtriple = 1;
+ }
+
+ if (line.compare(0, 17, "CHECKCOMPOUNDCASE", 17) == 0) {
+ checkcompoundcase = 1;
+ }
+
+ if (line.compare(0, 9, "NOSUGGEST", 9) == 0) {
+ if (!parse_flag(line, &nosuggest, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 14, "NONGRAMSUGGEST", 14) == 0) {
+ if (!parse_flag(line, &nongramsuggest, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by forbidden words */
+ if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
+ if (!parse_flag(line, &forbiddenword, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by forbidden words (is deprecated) */
+ if (line.compare(0, 13, "LEMMA_PRESENT", 13) == 0) {
+ if (!parse_flag(line, &lemma_present, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by circumfixes */
+ if (line.compare(0, 9, "CIRCUMFIX", 9) == 0) {
+ if (!parse_flag(line, &circumfix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by fogemorphemes */
+ if (line.compare(0, 14, "ONLYINCOMPOUND", 14) == 0) {
+ if (!parse_flag(line, &onlyincompound, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `needaffixs' (is deprecated) */
+ if (line.compare(0, 10, "PSEUDOROOT", 10) == 0) {
+ if (!parse_flag(line, &needaffix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `needaffixs' */
+ if (line.compare(0, 9, "NEEDAFFIX", 9) == 0) {
+ if (!parse_flag(line, &needaffix, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the minimal length for words in compounds */
+ if (line.compare(0, 11, "COMPOUNDMIN", 11) == 0) {
+ if (!parse_num(line, &cpdmin, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ if (cpdmin < 1)
+ cpdmin = 1;
+ }
+
+ /* parse in the max. words and syllables in compounds */
+ if (line.compare(0, 16, "COMPOUNDSYLLABLE", 16) == 0) {
+ if (!parse_cpdsyllable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by compound_check() method */
+ if (line.compare(0, 11, "SYLLABLENUM", 11) == 0) {
+ if (!parse_string(line, cpdsyllablenum, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by the controlled compound words */
+ if (line.compare(0, 8, "CHECKNUM", 8) == 0) {
+ checknum = 1;
+ }
+
+ /* parse in the extra word characters */
+ if (line.compare(0, 9, "WORDCHARS", 9) == 0) {
+ if (!parse_array(line, wordchars, wordchars_utf16,
+ utf8, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the ignored characters (for example, Arabic optional diacretics
+ * charachters */
+ if (line.compare(0, 6, "IGNORE", 6) == 0) {
+ if (!parse_array(line, ignorechars, ignorechars_utf16,
+ utf8, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the input conversion table */
+ if (line.compare(0, 5, "ICONV", 5) == 0) {
+ if (!parse_convtable(line, afflst, &iconvtable, "ICONV")) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the output conversion table */
+ if (line.compare(0, 5, "OCONV", 5) == 0) {
+ if (!parse_convtable(line, afflst, &oconvtable, "OCONV")) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the phonetic translation table */
+ if (line.compare(0, 5, "PHONE", 5) == 0) {
+ if (!parse_phonetable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the checkcompoundpattern table */
+ if (line.compare(0, 20, "CHECKCOMPOUNDPATTERN", 20) == 0) {
+ if (!parse_checkcpdtable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the defcompound table */
+ if (line.compare(0, 12, "COMPOUNDRULE", 12) == 0) {
+ if (!parse_defcpdtable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the related character map table */
+ if (line.compare(0, 3, "MAP", 3) == 0) {
+ if (!parse_maptable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the word breakpoints table */
+ if (line.compare(0, 5, "BREAK", 5) == 0) {
+ if (!parse_breaktable(line, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the language for language specific codes */
+ if (line.compare(0, 4, "LANG", 4) == 0) {
+ if (!parse_string(line, lang, afflst->getlinenum())) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ langnum = get_lang_num(lang);
+ }
+
+ if (line.compare(0, 7, "VERSION", 7) == 0) {
+ size_t startpos = line.find_first_not_of(" \t", 7);
+ if (startpos != std::string::npos) {
+ version = line.substr(startpos);
+ }
+ }
+
+ if (line.compare(0, 12, "MAXNGRAMSUGS", 12) == 0) {
+ if (!parse_num(line, &maxngramsugs, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 11, "ONLYMAXDIFF", 11) == 0)
+ onlymaxdiff = 1;
+
+ if (line.compare(0, 7, "MAXDIFF", 7) == 0) {
+ if (!parse_num(line, &maxdiff, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 10, "MAXCPDSUGS", 10) == 0) {
+ if (!parse_num(line, &maxcpdsugs, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 11, "NOSPLITSUGS", 11) == 0) {
+ nosplitsugs = 1;
+ }
+
+ if (line.compare(0, 9, "FULLSTRIP", 9) == 0) {
+ fullstrip = 1;
+ }
+
+ if (line.compare(0, 12, "SUGSWITHDOTS", 12) == 0) {
+ sugswithdots = 1;
+ }
+
+ /* parse in the flag used by forbidden words */
+ if (line.compare(0, 8, "KEEPCASE", 8) == 0) {
+ if (!parse_flag(line, &keepcase, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `forceucase' */
+ if (line.compare(0, 10, "FORCEUCASE", 10) == 0) {
+ if (!parse_flag(line, &forceucase, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ /* parse in the flag used by `warn' */
+ if (line.compare(0, 4, "WARN", 4) == 0) {
+ if (!parse_flag(line, &warn, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 10, "FORBIDWARN", 10) == 0) {
+ forbidwarn = 1;
+ }
+
+ /* parse in the flag used by the affix generator */
+ if (line.compare(0, 11, "SUBSTANDARD", 11) == 0) {
+ if (!parse_flag(line, &substandard, afflst)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 11, "CHECKSHARPS", 11) == 0) {
+ checksharps = 1;
+ }
+
+ /* parse this affix: P - prefix, S - suffix */
+ // affix type
+ char ft = ' ';
+ if (line.compare(0, 3, "PFX", 3) == 0)
+ ft = complexprefixes ? 'S' : 'P';
+ if (line.compare(0, 3, "SFX", 3) == 0)
+ ft = complexprefixes ? 'P' : 'S';
+ if (ft != ' ') {
+ if (dupflags_ini) {
+ memset(dupflags, 0, sizeof(dupflags));
+ dupflags_ini = 0;
+ }
+ if (!parse_affix(line, ft, afflst, dupflags)) {
+ finishFileMgr(afflst);
+ return 1;
+ }
+ }
+ }
+
+ finishFileMgr(afflst);
+ // affix trees are sorted now
+
+ // now we can speed up performance greatly taking advantage of the
+ // relationship between the affixes and the idea of "subsets".
+
+ // View each prefix as a potential leading subset of another and view
+ // each suffix (reversed) as a potential trailing subset of another.
+
+ // To illustrate this relationship if we know the prefix "ab" is found in the
+ // word to examine, only prefixes that "ab" is a leading subset of need be
+ // examined.
+ // Furthermore is "ab" is not present then none of the prefixes that "ab" is
+ // is a subset need be examined.
+ // The same argument goes for suffix string that are reversed.
+
+ // Then to top this off why not examine the first char of the word to quickly
+ // limit the set of prefixes to examine (i.e. the prefixes to examine must
+ // be leading supersets of the first character of the word (if they exist)
+
+ // To take advantage of this "subset" relationship, we need to add two links
+ // from entry. One to take next if the current prefix is found (call it
+ // nexteq)
+ // and one to take next if the current prefix is not found (call it nextne).
+
+ // Since we have built ordered lists, all that remains is to properly
+ // initialize
+ // the nextne and nexteq pointers that relate them
+
+ process_pfx_order();
+ process_sfx_order();
+
+ /* get encoding for CHECKCOMPOUNDCASE */
+ if (!utf8) {
+ csconv = get_current_cs(get_encoding());
+ for (int i = 0; i <= 255; i++) {
+ if ((csconv[i].cupper != csconv[i].clower) &&
+ (wordchars.find((char)i) == std::string::npos)) {
+ wordchars.push_back((char)i);
+ }
+ }
+
+ }
+
+ // default BREAK definition
+ if (!parsedbreaktable) {
+ breaktable.push_back("-");
+ breaktable.push_back("^-");
+ breaktable.push_back("-$");
+ parsedbreaktable = true;
+ }
+ return 0;
+}
+
+// we want to be able to quickly access prefix information
+// both by prefix flag, and sorted by prefix string itself
+// so we need to set up two indexes
+
+int AffixMgr::build_pfxtree(PfxEntry* pfxptr) {
+ PfxEntry* ptr;
+ PfxEntry* pptr;
+ PfxEntry* ep = pfxptr;
+
+ // get the right starting points
+ const char* key = ep->getKey();
+ const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
+
+ // first index by flag which must exist
+ ptr = pFlag[flg];
+ ep->setFlgNxt(ptr);
+ pFlag[flg] = ep;
+
+ // handle the special case of null affix string
+ if (strlen(key) == 0) {
+ // always inset them at head of list at element 0
+ ptr = pStart[0];
+ ep->setNext(ptr);
+ pStart[0] = ep;
+ return 0;
+ }
+
+ // now handle the normal case
+ ep->setNextEQ(NULL);
+ ep->setNextNE(NULL);
+
+ unsigned char sp = *((const unsigned char*)key);
+ ptr = pStart[sp];
+
+ // handle the first insert
+ if (!ptr) {
+ pStart[sp] = ep;
+ return 0;
+ }
+
+ // otherwise use binary tree insertion so that a sorted
+ // list can easily be generated later
+ pptr = NULL;
+ for (;;) {
+ pptr = ptr;
+ if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
+ ptr = ptr->getNextEQ();
+ if (!ptr) {
+ pptr->setNextEQ(ep);
+ break;
+ }
+ } else {
+ ptr = ptr->getNextNE();
+ if (!ptr) {
+ pptr->setNextNE(ep);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+// we want to be able to quickly access suffix information
+// both by suffix flag, and sorted by the reverse of the
+// suffix string itself; so we need to set up two indexes
+int AffixMgr::build_sfxtree(SfxEntry* sfxptr) {
+
+ sfxptr->initReverseWord();
+
+ SfxEntry* ptr;
+ SfxEntry* pptr;
+ SfxEntry* ep = sfxptr;
+
+ /* get the right starting point */
+ const char* key = ep->getKey();
+ const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF);
+
+ // first index by flag which must exist
+ ptr = sFlag[flg];
+ ep->setFlgNxt(ptr);
+ sFlag[flg] = ep;
+
+ // next index by affix string
+
+ // handle the special case of null affix string
+ if (strlen(key) == 0) {
+ // always inset them at head of list at element 0
+ ptr = sStart[0];
+ ep->setNext(ptr);
+ sStart[0] = ep;
+ return 0;
+ }
+
+ // now handle the normal case
+ ep->setNextEQ(NULL);
+ ep->setNextNE(NULL);
+
+ unsigned char sp = *((const unsigned char*)key);
+ ptr = sStart[sp];
+
+ // handle the first insert
+ if (!ptr) {
+ sStart[sp] = ep;
+ return 0;
+ }
+
+ // otherwise use binary tree insertion so that a sorted
+ // list can easily be generated later
+ pptr = NULL;
+ for (;;) {
+ pptr = ptr;
+ if (strcmp(ep->getKey(), ptr->getKey()) <= 0) {
+ ptr = ptr->getNextEQ();
+ if (!ptr) {
+ pptr->setNextEQ(ep);
+ break;
+ }
+ } else {
+ ptr = ptr->getNextNE();
+ if (!ptr) {
+ pptr->setNextNE(ep);
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+// convert from binary tree to sorted list
+int AffixMgr::process_pfx_tree_to_list() {
+ for (int i = 1; i < SETSIZE; i++) {
+ pStart[i] = process_pfx_in_order(pStart[i], NULL);
+ }
+ return 0;
+}
+
+PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) {
+ if (ptr) {
+ nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
+ }
+ return nptr;
+}
+
+// convert from binary tree to sorted list
+int AffixMgr::process_sfx_tree_to_list() {
+ for (int i = 1; i < SETSIZE; i++) {
+ sStart[i] = process_sfx_in_order(sStart[i], NULL);
+ }
+ return 0;
+}
+
+SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) {
+ if (ptr) {
+ nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
+ ptr->setNext(nptr);
+ nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
+ }
+ return nptr;
+}
+
+// reinitialize the PfxEntry links NextEQ and NextNE to speed searching
+// using the idea of leading subsets this time
+int AffixMgr::process_pfx_order() {
+ PfxEntry* ptr;
+
+ // loop through each prefix list starting point
+ for (int i = 1; i < SETSIZE; i++) {
+ ptr = pStart[i];
+
+ // look through the remainder of the list
+ // and find next entry with affix that
+ // the current one is not a subset of
+ // mark that as destination for NextNE
+ // use next in list that you are a subset
+ // of as NextEQ
+
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ PfxEntry* nptr = ptr->getNext();
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ }
+ ptr->setNextNE(nptr);
+ ptr->setNextEQ(NULL);
+ if ((ptr->getNext()) &&
+ isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
+ ptr->setNextEQ(ptr->getNext());
+ }
+
+ // now clean up by adding smart search termination strings:
+ // if you are already a superset of the previous prefix
+ // but not a subset of the next, search can end here
+ // so set NextNE properly
+
+ ptr = pStart[i];
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ PfxEntry* nptr = ptr->getNext();
+ PfxEntry* mptr = NULL;
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ mptr = nptr;
+ }
+ if (mptr)
+ mptr->setNextNE(NULL);
+ }
+ }
+ return 0;
+}
+
+// initialize the SfxEntry links NextEQ and NextNE to speed searching
+// using the idea of leading subsets this time
+int AffixMgr::process_sfx_order() {
+ SfxEntry* ptr;
+
+ // loop through each prefix list starting point
+ for (int i = 1; i < SETSIZE; i++) {
+ ptr = sStart[i];
+
+ // look through the remainder of the list
+ // and find next entry with affix that
+ // the current one is not a subset of
+ // mark that as destination for NextNE
+ // use next in list that you are a subset
+ // of as NextEQ
+
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ SfxEntry* nptr = ptr->getNext();
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ }
+ ptr->setNextNE(nptr);
+ ptr->setNextEQ(NULL);
+ if ((ptr->getNext()) &&
+ isSubset(ptr->getKey(), (ptr->getNext())->getKey()))
+ ptr->setNextEQ(ptr->getNext());
+ }
+
+ // now clean up by adding smart search termination strings:
+ // if you are already a superset of the previous suffix
+ // but not a subset of the next, search can end here
+ // so set NextNE properly
+
+ ptr = sStart[i];
+ for (; ptr != NULL; ptr = ptr->getNext()) {
+ SfxEntry* nptr = ptr->getNext();
+ SfxEntry* mptr = NULL;
+ for (; nptr != NULL; nptr = nptr->getNext()) {
+ if (!isSubset(ptr->getKey(), nptr->getKey()))
+ break;
+ mptr = nptr;
+ }
+ if (mptr)
+ mptr->setNextNE(NULL);
+ }
+ }
+ return 0;
+}
+
+// add flags to the result for dictionary debugging
+std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) {
+ char* st = encode_flag(flag);
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_FLAG);
+ if (st) {
+ result.append(st);
+ free(st);
+ }
+ return result;
+}
+
+// calculate the character length of the condition
+int AffixMgr::condlen(const char* st) {
+ int l = 0;
+ bool group = false;
+ for (; *st; st++) {
+ if (*st == '[') {
+ group = true;
+ l++;
+ } else if (*st == ']')
+ group = false;
+ else if (!group && (!utf8 || (!(*st & 0x80) || ((*st & 0xc0) == 0x80))))
+ l++;
+ }
+ return l;
+}
+
+int AffixMgr::encodeit(AffEntry& entry, const char* cs) {
+ if (strcmp(cs, ".") != 0) {
+ entry.numconds = (char)condlen(cs);
+ const size_t cslen = strlen(cs);
+ const size_t short_part = std::min<size_t>(MAXCONDLEN, cslen);
+ memcpy(entry.c.conds, cs, short_part);
+ if (short_part < MAXCONDLEN) {
+ //blank out the remaining space
+ memset(entry.c.conds + short_part, 0, MAXCONDLEN - short_part);
+ } else if (cs[MAXCONDLEN]) {
+ //there is more conditions than fit in fixed space, so its
+ //a long condition
+ entry.opts |= aeLONGCOND;
+ entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
+ if (!entry.c.l.conds2)
+ return 1;
+ }
+ } else {
+ entry.numconds = 0;
+ entry.c.conds[0] = '\0';
+ }
+ return 0;
+}
+
+// return 1 if s1 is a leading subset of s2 (dots are for infixes)
+inline int AffixMgr::isSubset(const char* s1, const char* s2) {
+ while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
+ s1++;
+ s2++;
+ }
+ return (*s1 == '\0');
+}
+
+// check word for prefixes
+struct hentry* AffixMgr::prefix_check(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ pfx = NULL;
+ pfxappnd = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ if (
+ // fogemorpheme
+ ((in_compound != IN_CPD_NOT) ||
+ !(pe->getCont() &&
+ (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
+ // permit prefixes in compounds
+ ((in_compound != IN_CPD_END) ||
+ (pe->getCont() &&
+ (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))) {
+ // check prefix
+ rv = pe->checkword(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pe; // BUG: pfx not stateless
+ return rv;
+ }
+ }
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ if (
+ // fogemorpheme
+ ((in_compound != IN_CPD_NOT) ||
+ !(pptr->getCont() &&
+ (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
+ // permit prefixes in compounds
+ ((in_compound != IN_CPD_END) ||
+ (pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag,
+ pptr->getContLen()))))) {
+ // check prefix
+ rv = pptr->checkword(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pptr; // BUG: pfx not stateless
+ return rv;
+ }
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for prefixes and two-level suffixes
+struct hentry* AffixMgr::prefix_check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+
+ while (pe) {
+ rv = pe->check_twosfx(word, len, in_compound, needflag);
+ if (rv)
+ return rv;
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ rv = pptr->check_twosfx(word, len, in_compound, needflag);
+ if (rv) {
+ pfx = pptr;
+ return rv;
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for prefixes and morph
+std::string AffixMgr::prefix_check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+
+ std::string result;
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ std::string st = pe->check_morph(word, len, in_compound, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ std::string st = pptr->check_morph(word, len, in_compound, needflag);
+ if (!st.empty()) {
+ // fogemorpheme
+ if ((in_compound != IN_CPD_NOT) ||
+ !((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound,
+ pptr->getContLen()))))) {
+ result.append(st);
+ pfx = pptr;
+ }
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return result;
+}
+
+// check word for prefixes and morph and two-level suffixes
+std::string AffixMgr::prefix_check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag) {
+ std::string result;
+
+ pfx = NULL;
+ sfxappnd = NULL;
+ sfxextra = 0;
+
+ // first handle the special case of 0 length prefixes
+ PfxEntry* pe = pStart[0];
+ while (pe) {
+ std::string st = pe->check_twosfx_morph(word, len, in_compound, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ pe = pe->getNext();
+ }
+
+ // now handle the general case
+ unsigned char sp = *((const unsigned char*)word);
+ PfxEntry* pptr = pStart[sp];
+
+ while (pptr) {
+ if (isSubset(pptr->getKey(), word)) {
+ std::string st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ pfx = pptr;
+ }
+ pptr = pptr->getNextEQ();
+ } else {
+ pptr = pptr->getNextNE();
+ }
+ }
+
+ return result;
+}
+
+// Is word a non-compound with a REP substitution (see checkcompoundrep)?
+int AffixMgr::cpdrep_check(const char* word, int wl) {
+
+ if ((wl < 2) || get_reptable().empty())
+ return 0;
+
+ for (size_t i = 0; i < get_reptable().size(); ++i) {
+ // use only available mid patterns
+ if (!get_reptable()[i].outstrings[0].empty()) {
+ const char* r = word;
+ const size_t lenp = get_reptable()[i].pattern.size();
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, get_reptable()[i].pattern.c_str())) != NULL) {
+ std::string candidate(word);
+ candidate.replace(r - word, lenp, get_reptable()[i].outstrings[0]);
+ if (candidate_check(candidate.c_str(), candidate.size()))
+ return 1;
+ ++r; // search for the next letter
+ }
+ }
+ }
+
+ return 0;
+}
+
+// forbid compound words, if they are in the dictionary as a
+// word pair separated by space
+int AffixMgr::cpdwordpair_check(const char * word, int wl) {
+ if (wl > 2) {
+ std::string candidate(word);
+ for (size_t i = 1; i < candidate.size(); i++) {
+ // go to end of the UTF-8 character
+ if (utf8 && ((word[i] & 0xc0) == 0x80))
+ continue;
+ candidate.insert(i, 1, ' ');
+ if (candidate_check(candidate.c_str(), candidate.size()))
+ return 1;
+ candidate.erase(i, 1);
+ }
+ }
+
+ return 0;
+}
+
+// forbid compoundings when there are special patterns at word bound
+int AffixMgr::cpdpat_check(const char* word,
+ int pos,
+ hentry* r1,
+ hentry* r2,
+ const char /*affixed*/) {
+ for (size_t i = 0; i < checkcpdtable.size(); ++i) {
+ size_t len;
+ if (isSubset(checkcpdtable[i].pattern2.c_str(), word + pos) &&
+ (!r1 || !checkcpdtable[i].cond ||
+ (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
+ (!r2 || !checkcpdtable[i].cond2 ||
+ (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
+ // zero length pattern => only TESTAFF
+ // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
+ (checkcpdtable[i].pattern.empty() ||
+ ((checkcpdtable[i].pattern[0] == '0' && r1->blen <= pos &&
+ strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
+ (checkcpdtable[i].pattern[0] != '0' &&
+ ((len = checkcpdtable[i].pattern.size()) != 0) &&
+ strncmp(word + pos - len, checkcpdtable[i].pattern.c_str(), len) == 0)))) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// forbid compounding with neighbouring upper and lower case characters at word
+// bounds
+int AffixMgr::cpdcase_check(const char* word, int pos) {
+ if (utf8) {
+ const char* p;
+ for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
+ ;
+ std::string pair(p);
+ std::vector<w_char> pair_u;
+ u8_u16(pair_u, pair);
+ unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
+ unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
+ if (((unicodetoupper(a, langnum) == a) ||
+ (unicodetoupper(b, langnum) == b)) &&
+ (a != '-') && (b != '-'))
+ return 1;
+ } else {
+ unsigned char a = *(word + pos - 1);
+ unsigned char b = *(word + pos);
+ if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-'))
+ return 1;
+ }
+ return 0;
+}
+
+struct metachar_data {
+ signed short btpp; // metacharacter (*, ?) position for backtracking
+ signed short btwp; // word position for metacharacters
+ int btnum; // number of matched characters in metacharacter
+};
+
+// check compound patterns
+int AffixMgr::defcpd_check(hentry*** words,
+ short wnum,
+ hentry* rv,
+ hentry** def,
+ char all) {
+ int w = 0;
+
+ if (!*words) {
+ w = 1;
+ *words = def;
+ }
+
+ if (!*words) {
+ return 0;
+ }
+
+ std::vector<metachar_data> btinfo(1);
+
+ short bt = 0;
+
+ (*words)[wnum] = rv;
+
+ // has the last word COMPOUNDRULE flag?
+ if (rv->alen == 0) {
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+ }
+ int ok = 0;
+ for (size_t i = 0; i < defcpdtable.size(); ++i) {
+ for (size_t j = 0; j < defcpdtable[i].size(); ++j) {
+ if (defcpdtable[i][j] != '*' && defcpdtable[i][j] != '?' &&
+ TESTAFF(rv->astr, defcpdtable[i][j], rv->alen)) {
+ ok = 1;
+ break;
+ }
+ }
+ }
+ if (ok == 0) {
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+ }
+
+ for (size_t i = 0; i < defcpdtable.size(); ++i) {
+ size_t pp = 0; // pattern position
+ signed short wp = 0; // "words" position
+ int ok2;
+ ok = 1;
+ ok2 = 1;
+ do {
+ while ((pp < defcpdtable[i].size()) && (wp <= wnum)) {
+ if (((pp + 1) < defcpdtable[i].size()) &&
+ ((defcpdtable[i][pp + 1] == '*') ||
+ (defcpdtable[i][pp + 1] == '?'))) {
+ int wend = (defcpdtable[i][pp + 1] == '?') ? wp : wnum;
+ ok2 = 1;
+ pp += 2;
+ btinfo[bt].btpp = pp;
+ btinfo[bt].btwp = wp;
+ while (wp <= wend) {
+ if (!(*words)[wp]->alen ||
+ !TESTAFF((*words)[wp]->astr, defcpdtable[i][pp - 2],
+ (*words)[wp]->alen)) {
+ ok2 = 0;
+ break;
+ }
+ wp++;
+ }
+ if (wp <= wnum)
+ ok2 = 0;
+ btinfo[bt].btnum = wp - btinfo[bt].btwp;
+ if (btinfo[bt].btnum > 0) {
+ ++bt;
+ btinfo.resize(bt+1);
+ }
+ if (ok2)
+ break;
+ } else {
+ ok2 = 1;
+ if (!(*words)[wp] || !(*words)[wp]->alen ||
+ !TESTAFF((*words)[wp]->astr, defcpdtable[i][pp],
+ (*words)[wp]->alen)) {
+ ok = 0;
+ break;
+ }
+ pp++;
+ wp++;
+ if ((defcpdtable[i].size() == pp) && !(wp > wnum))
+ ok = 0;
+ }
+ }
+ if (ok && ok2) {
+ size_t r = pp;
+ while ((defcpdtable[i].size() > r) && ((r + 1) < defcpdtable[i].size()) &&
+ ((defcpdtable[i][r + 1] == '*') ||
+ (defcpdtable[i][r + 1] == '?')))
+ r += 2;
+ if (defcpdtable[i].size() <= r)
+ return 1;
+ }
+ // backtrack
+ if (bt)
+ do {
+ ok = 1;
+ btinfo[bt - 1].btnum--;
+ pp = btinfo[bt - 1].btpp;
+ wp = btinfo[bt - 1].btwp + (signed short)btinfo[bt - 1].btnum;
+ } while ((btinfo[bt - 1].btnum < 0) && --bt);
+ } while (bt);
+
+ if (ok && ok2 && (!all || (defcpdtable[i].size() <= pp)))
+ return 1;
+
+ // check zero ending
+ while (ok && ok2 && (defcpdtable[i].size() > pp) &&
+ ((pp + 1) < defcpdtable[i].size()) &&
+ ((defcpdtable[i][pp + 1] == '*') ||
+ (defcpdtable[i][pp + 1] == '?')))
+ pp += 2;
+ if (ok && ok2 && (defcpdtable[i].size() <= pp))
+ return 1;
+ }
+ (*words)[wnum] = NULL;
+ if (w)
+ *words = NULL;
+ return 0;
+}
+
+inline int AffixMgr::candidate_check(const char* word, int len) {
+
+ struct hentry* rv = lookup(word);
+ if (rv)
+ return 1;
+
+ // rv = prefix_check(word,len,1);
+ // if (rv) return 1;
+
+ rv = affix_check(word, len);
+ if (rv)
+ return 1;
+ return 0;
+}
+
+// calculate number of syllable for compound-checking
+short AffixMgr::get_syllable(const std::string& word) {
+ if (cpdmaxsyllable == 0)
+ return 0;
+
+ short num = 0;
+
+ if (!utf8) {
+ for (size_t i = 0; i < word.size(); ++i) {
+ if (std::binary_search(cpdvowels.begin(), cpdvowels.end(),
+ word[i])) {
+ ++num;
+ }
+ }
+ } else if (!cpdvowels_utf16.empty()) {
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ for (size_t i = 0; i < w.size(); ++i) {
+ if (std::binary_search(cpdvowels_utf16.begin(),
+ cpdvowels_utf16.end(),
+ w[i])) {
+ ++num;
+ }
+ }
+ }
+
+ return num;
+}
+
+void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) {
+ if (utf8) {
+ int i;
+ for (*cmin = 0, i = 0; (i < cpdmin) && *cmin < len; i++) {
+ for ((*cmin)++; *cmin < len && (word[*cmin] & 0xc0) == 0x80; (*cmin)++)
+ ;
+ }
+ for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax >= 0; i++) {
+ for ((*cmax)--; *cmax >= 0 && (word[*cmax] & 0xc0) == 0x80; (*cmax)--)
+ ;
+ }
+ } else {
+ *cmin = cpdmin;
+ *cmax = len - cpdmin + 1;
+ }
+}
+
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+struct hentry* AffixMgr::compound_check(const std::string& word,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words = NULL,
+ hentry** rwords = NULL,
+ char hu_mov_rule = 0,
+ char is_sug = 0,
+ int* info = NULL) {
+ int i;
+ short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+ struct hentry* rv = NULL;
+ struct hentry* rv_first;
+ std::string st;
+ char ch = '\0';
+ int cmin;
+ int cmax;
+ int striple = 0;
+ size_t scpd = 0;
+ int soldi = 0;
+ int oldcmin = 0;
+ int oldcmax = 0;
+ int oldlen = 0;
+ int checkedstriple = 0;
+ char affixed = 0;
+ hentry** oldwords = words;
+ size_t len = word.size();
+
+ int checked_prefix;
+
+ // add a time limit to handle possible
+ // combinatorical explosion of the overlapping words
+
+ HUNSPELL_THREAD_LOCAL clock_t timelimit;
+
+ if (wordnum == 0) {
+ // get the start time, seeing as we're reusing this set to 0
+ // to flag timeout, use clock() + 1 to avoid start clock()
+ // of 0 as being a timeout
+ timelimit = clock() + 1;
+ }
+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) {
+ timelimit = 0;
+ }
+
+ setcminmax(&cmin, &cmax, word.c_str(), len);
+
+ st.assign(word);
+
+ for (i = cmin; i < cmax; i++) {
+ // go to end of the UTF-8 character
+ if (utf8) {
+ for (; (st[i] & 0xc0) == 0x80; i++)
+ ;
+ if (i >= cmax)
+ return NULL;
+ }
+
+ words = oldwords;
+ int onlycpdrule = (words) ? 1 : 0;
+
+ do { // onlycpdrule loop
+
+ oldnumsyllable = numsyllable;
+ oldwordnum = wordnum;
+ checked_prefix = 0;
+
+ do { // simplified checkcompoundpattern loop
+
+ if (timelimit == 0)
+ return 0;
+
+ if (scpd > 0) {
+ for (; scpd <= checkcpdtable.size() &&
+ (checkcpdtable[scpd - 1].pattern3.empty() ||
+ strncmp(word.c_str() + i, checkcpdtable[scpd - 1].pattern3.c_str(),
+ checkcpdtable[scpd - 1].pattern3.size()) != 0);
+ scpd++)
+ ;
+
+ if (scpd > checkcpdtable.size())
+ break; // break simplified checkcompoundpattern loop
+ st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern);
+ soldi = i;
+ i += checkcpdtable[scpd - 1].pattern.size();
+ st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern2);
+ st.replace(i + checkcpdtable[scpd - 1].pattern2.size(), std::string::npos,
+ word.substr(soldi + checkcpdtable[scpd - 1].pattern3.size()));
+
+ oldlen = len;
+ len += checkcpdtable[scpd - 1].pattern.size() +
+ checkcpdtable[scpd - 1].pattern2.size() -
+ checkcpdtable[scpd - 1].pattern3.size();
+ oldcmin = cmin;
+ oldcmax = cmax;
+ setcminmax(&cmin, &cmax, st.c_str(), len);
+
+ cmax = len - cpdmin + 1;
+ }
+
+ ch = st[i];
+ st[i] = '\0';
+
+ sfx = NULL;
+ pfx = NULL;
+
+ // FIRST WORD
+
+ affixed = 1;
+ rv = lookup(st.c_str()); // perhaps without prefix
+
+ // forbid dictionary stems with COMPOUNDFORBIDFLAG in
+ // compound words, overriding the effect of COMPOUNDPERMITFLAG
+ if ((rv) && compoundforbidflag &&
+ TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule)
+ continue;
+
+ // search homonym with compound flag
+ while ((rv) && !hu_mov_rule &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundbegin && !wordnum && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ (compoundmiddle && wordnum && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+ (!defcpdtable.empty() && onlycpdrule &&
+ ((!words && !wordnum &&
+ defcpd_check(&words, wnum, rv, rwords, 0)) ||
+ (words &&
+ defcpd_check(&words, wnum, rv, rwords, 0))))) ||
+ (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL &&
+ !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) {
+ rv = rv->next_homonym;
+ }
+
+ if (rv)
+ affixed = 0;
+
+ if (!rv) {
+ if (onlycpdrule)
+ break;
+ if (compoundflag &&
+ !(rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundflag))) {
+ if (((rv = suffix_check(
+ st.c_str(), i, 0, NULL, FLAG_NULL, compoundflag,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&
+ !hu_mov_rule && sfx->getCont() &&
+ ((compoundforbidflag &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) ||
+ (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+ }
+
+ if (rv ||
+ (((wordnum == 0) && compoundbegin &&
+ ((rv = suffix_check(
+ st.c_str(), i, 0, NULL, FLAG_NULL, compoundbegin,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundbegin))) || // twofold suffixes + compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundbegin)))) ||
+ ((wordnum > 0) && compoundmiddle &&
+ ((rv = suffix_check(
+ st.c_str(), i, 0, NULL, FLAG_NULL, compoundmiddle,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundmiddle))) || // twofold suffixes + compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundmiddle))))))
+ checked_prefix = 1;
+ // else check forbiddenwords and needaffix
+ } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, needaffix, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen)))) {
+ st[i] = ch;
+ // continue;
+ break;
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check compoundend flag in suffix and prefix
+ if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check compoundmiddle flag in suffix and prefix
+ if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
+ !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
+ return NULL;
+ }
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && compoundroot &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // first word is acceptable in compound words?
+ if (((rv) &&
+ (checked_prefix || (words && words[wnum]) ||
+ (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ ((oldwordnum == 0) && compoundbegin &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ ((oldwordnum > 0) && compoundmiddle &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen))
+
+ // LANG_hu section: spec. Hungarian rule
+ || ((langnum == LANG_hu) && hu_mov_rule &&
+ (TESTAFF(
+ rv->astr, 'F',
+ rv->alen) || // XXX hardwired Hungarian dictionary codes
+ TESTAFF(rv->astr, 'G', rv->alen) ||
+ TESTAFF(rv->astr, 'H', rv->alen)))
+ // END of LANG_hu section
+ ) &&
+ (
+ // test CHECKCOMPOUNDPATTERN conditions
+ scpd == 0 || checkcpdtable[scpd - 1].cond == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)) &&
+ !((checkcompoundtriple && scpd == 0 &&
+ !words && // test triple letters
+ (word[i - 1] == word[i]) &&
+ (((i > 1) && (word[i - 1] == word[i - 2])) ||
+ ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
+ )) ||
+ (checkcompoundcase && scpd == 0 && !words &&
+ cpdcase_check(word.c_str(), i))))
+ // LANG_hu section: spec. Hungarian rule
+ || ((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
+ (rv = affix_check(st.c_str(), i)) &&
+ (sfx && sfx->getCont() &&
+ ( // XXX hardwired Hungarian dic. codes
+ TESTAFF(sfx->getCont(), (unsigned short)'x',
+ sfx->getContLen()) ||
+ TESTAFF(
+ sfx->getCont(), (unsigned short)'%',
+ sfx->getContLen()))))) { // first word is ok condition
+
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(st.substr(0, i));
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+ }
+ // END of LANG_hu section
+
+ // NEXT WORD(S)
+ rv_first = rv;
+ st[i] = ch;
+
+ do { // striple loop
+
+ // check simplifiedtriple
+ if (simplifiedtriple) {
+ if (striple) {
+ checkedstriple = 1;
+ i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
+ } else if (i > 2 && word[i - 1] == word[i - 2])
+ striple = 1;
+ }
+
+ rv = lookup(st.c_str() + i); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && !words &&
+ TESTAFF(rv->astr, compoundend, rv->alen)) ||
+ (!defcpdtable.empty() && words &&
+ defcpd_check(&words, wnum + 1, rv, NULL, 1))) ||
+ (scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL &&
+ !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2,
+ rv->alen)))) {
+ rv = rv->next_homonym;
+ }
+
+ // check FORCEUCASE
+ if (rv && forceucase && (rv) &&
+ (TESTAFF(rv->astr, forceucase, rv->alen)) &&
+ !(info && *info & SPELL_ORIGCAP))
+ rv = NULL;
+
+ if (rv && words && words[wnum + 1])
+ return rv_first;
+
+ oldnumsyllable2 = numsyllable;
+ oldwordnum2 = wordnum;
+
+ // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary
+ // code
+ if ((rv) && (langnum == LANG_hu) &&
+ (TESTAFF(rv->astr, 'I', rv->alen)) &&
+ !(TESTAFF(rv->astr, 'J', rv->alen))) {
+ numsyllable--;
+ }
+ // END of LANG_hu section
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen))))
+ return NULL;
+
+ // second word is acceptable, as a root?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist of 2 words, or if more,
+ // then the syllable number of root words must be 6, or lesser.
+
+ if ((rv) &&
+ ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) &&
+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
+ cpdmaxsyllable))) &&
+ (
+ // test CHECKCOMPOUNDPATTERN
+ checkcpdtable.empty() || scpd != 0 ||
+ !cpdpat_check(word.c_str(), i, rv_first, rv, 0)) &&
+ ((!checkcompounddup || (rv != rv_first)))
+ // test CHECKCOMPOUNDPATTERN conditions
+ &&
+ (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) {
+ // forbid compound word, if it is a non-compound word with typical
+ // fault
+ if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) ||
+ cpdwordpair_check(word.c_str(), len))
+ return NULL;
+ return rv_first;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word has prefix or/and suffix
+ sfx = NULL;
+ sfxflag = FLAG_NULL;
+ rv = (compoundflag && !onlycpdrule)
+ ? affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundflag,
+ IN_CPD_END)
+ : NULL;
+ if (!rv && compoundend && !onlycpdrule) {
+ sfx = NULL;
+ pfx = NULL;
+ rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundend,
+ IN_CPD_END);
+ }
+
+ if (!rv && !defcpdtable.empty() && words) {
+ rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), 0, IN_CPD_END);
+ if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1))
+ return rv_first;
+ rv = NULL;
+ }
+
+ // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
+ if (rv &&
+ !(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL ||
+ TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen)))
+ rv = NULL;
+
+ // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
+ if (rv && !checkcpdtable.empty() && scpd == 0 &&
+ cpdpat_check(word.c_str(), i, rv_first, rv, affixed))
+ rv = NULL;
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag,
+ pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check FORCEUCASE
+ if (rv && forceucase && (rv) &&
+ (TESTAFF(rv->astr, forceucase, rv->alen)) &&
+ !(info && *info & SPELL_ORIGCAP))
+ rv = NULL;
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ (is_sug && nosuggest &&
+ TESTAFF(rv->astr, nosuggest, rv->alen))))
+ return NULL;
+
+ // pfxappnd = prefix of word+i, or NULL
+ // calculate syllable number of prefix.
+ // hungarian convention: when syllable number of prefix is more,
+ // than 1, the prefix+word counts as two words.
+
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(word.c_str() + i);
+
+ // - affix syllable num.
+ // XXX only second suffix (inflections, not derivations)
+ if (sfxappnd) {
+ std::string tmp(sfxappnd);
+ reverseword(tmp);
+ numsyllable -= short(get_syllable(tmp) + sfxextra);
+ } else {
+ numsyllable -= short(sfxextra);
+ }
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+
+ // increment syllable num, if last word has a SYLLABLENUM flag
+ // and the suffix is beginning `s'
+
+ if (!cpdsyllablenum.empty()) {
+ switch (sfxflag) {
+ case 'c': {
+ numsyllable += 2;
+ break;
+ }
+ case 'J': {
+ numsyllable += 1;
+ break;
+ }
+ case 'I': {
+ if (rv && TESTAFF(rv->astr, 'J', rv->alen))
+ numsyllable += 1;
+ break;
+ }
+ }
+ }
+ }
+
+ // increment word number, if the second word has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+ // second word is acceptable, as a word with prefix or/and suffix?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist 2 word, otherwise
+ // the syllable number of root words is 6, or lesser.
+ if ((rv) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ // forbid compound word, if it is a non-compound word with typical
+ // fault
+ if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) ||
+ cpdwordpair_check(word.c_str(), len))
+ return NULL;
+ return rv_first;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word is a compound word (recursive call)
+ if (wordnum + 2 < maxwordnum) {
+ rv = compound_check(st.substr(i), wordnum + 1,
+ numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
+ is_sug, info);
+
+ if (rv && !checkcpdtable.empty() &&
+ ((scpd == 0 &&
+ cpdpat_check(word.c_str(), i, rv_first, rv, affixed)) ||
+ (scpd != 0 &&
+ !cpdpat_check(word.c_str(), i, rv_first, rv, affixed))))
+ rv = NULL;
+ } else {
+ rv = NULL;
+ }
+ if (rv) {
+ // forbid compound word, if it is a non-compound word with typical
+ // fault, or a dictionary word pair
+
+ if (cpdwordpair_check(word.c_str(), len))
+ return NULL;
+
+ if (checkcompoundrep || forbiddenword) {
+
+ if (checkcompoundrep && cpdrep_check(word.c_str(), len))
+ return NULL;
+
+ // check first part
+ if (strncmp(rv->word, word.c_str() + i, rv->blen) == 0) {
+ char r = st[i + rv->blen];
+ st[i + rv->blen] = '\0';
+
+ if ((checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) ||
+ cpdwordpair_check(st.c_str(), i + rv->blen)) {
+ st[ + i + rv->blen] = r;
+ continue;
+ }
+
+ if (forbiddenword) {
+ struct hentry* rv2 = lookup(word.c_str());
+ if (!rv2)
+ rv2 = affix_check(word.c_str(), len);
+ if (rv2 && rv2->astr &&
+ TESTAFF(rv2->astr, forbiddenword, rv2->alen) &&
+ (strncmp(rv2->word, st.c_str(), i + rv->blen) == 0)) {
+ return NULL;
+ }
+ }
+ st[i + rv->blen] = r;
+ }
+ }
+ return rv_first;
+ }
+ } while (striple && !checkedstriple); // end of striple loop
+
+ if (checkedstriple) {
+ i++;
+ checkedstriple = 0;
+ striple = 0;
+ }
+
+ } // first word is ok condition
+
+ if (soldi != 0) {
+ i = soldi;
+ soldi = 0;
+ len = oldlen;
+ cmin = oldcmin;
+ cmax = oldcmax;
+ }
+ scpd++;
+
+ } while (!onlycpdrule && simplifiedcpd &&
+ scpd <= checkcpdtable.size()); // end of simplifiedcpd loop
+
+ scpd = 0;
+ wordnum = oldwordnum;
+ numsyllable = oldnumsyllable;
+
+ if (soldi != 0) {
+ i = soldi;
+ st.assign(word); // XXX add more optim.
+ soldi = 0;
+ } else
+ st[i] = ch;
+
+ } while (!defcpdtable.empty() && oldwordnum == 0 &&
+ onlycpdrule++ < 1); // end of onlycpd loop
+ }
+
+ return NULL;
+}
+
+// check if compound word is correctly spelled
+// hu_mov_rule = spec. Hungarian rule (XXX)
+int AffixMgr::compound_check_morph(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ std::string& result,
+ const std::string* partresult) {
+ int i;
+ short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
+ int ok = 0;
+
+ struct hentry* rv = NULL;
+ struct hentry* rv_first;
+ std::string st;
+ char ch;
+
+ int checked_prefix;
+ std::string presult;
+
+ int cmin;
+ int cmax;
+
+ char affixed = 0;
+ hentry** oldwords = words;
+
+ // add a time limit to handle possible
+ // combinatorical explosion of the overlapping words
+
+ HUNSPELL_THREAD_LOCAL clock_t timelimit;
+
+ if (wordnum == 0) {
+ // get the start time, seeing as we're reusing this set to 0
+ // to flag timeout, use clock() + 1 to avoid start clock()
+ // of 0 as being a timeout
+ timelimit = clock() + 1;
+ }
+ else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) {
+ timelimit = 0;
+ }
+
+ setcminmax(&cmin, &cmax, word, len);
+
+ st.assign(word);
+
+ for (i = cmin; i < cmax; i++) {
+ // go to end of the UTF-8 character
+ if (utf8) {
+ for (; (st[i] & 0xc0) == 0x80; i++)
+ ;
+ if (i >= cmax)
+ return 0;
+ }
+
+ words = oldwords;
+ int onlycpdrule = (words) ? 1 : 0;
+
+ do { // onlycpdrule loop
+
+ if (timelimit == 0)
+ return 0;
+
+ oldnumsyllable = numsyllable;
+ oldwordnum = wordnum;
+ checked_prefix = 0;
+
+ ch = st[i];
+ st[i] = '\0';
+ sfx = NULL;
+
+ // FIRST WORD
+
+ affixed = 1;
+
+ presult.clear();
+ if (partresult)
+ presult.append(*partresult);
+
+ rv = lookup(st.c_str()); // perhaps without prefix
+
+ // forbid dictionary stems with COMPOUNDFORBIDFLAG in
+ // compound words, overriding the effect of COMPOUNDPERMITFLAG
+ if ((rv) && compoundforbidflag &&
+ TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule)
+ continue;
+
+ // search homonym with compound flag
+ while ((rv) && !hu_mov_rule &&
+ ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundbegin && !wordnum && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ (compoundmiddle && wordnum && !words && !onlycpdrule &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
+ (!defcpdtable.empty() && onlycpdrule &&
+ ((!words && !wordnum &&
+ defcpd_check(&words, wnum, rv, rwords, 0)) ||
+ (words &&
+ defcpd_check(&words, wnum, rv, rwords, 0))))))) {
+ rv = rv->next_homonym;
+ }
+
+ if (timelimit == 0)
+ return 0;
+
+ if (rv)
+ affixed = 0;
+
+ if (rv) {
+ presult.push_back(MSEP_FLD);
+ presult.append(MORPH_PART);
+ presult.append(st.c_str());
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ presult.push_back(MSEP_FLD);
+ presult.append(MORPH_STEM);
+ presult.append(st.c_str());
+ }
+ if (HENTRY_DATA(rv)) {
+ presult.push_back(MSEP_FLD);
+ presult.append(HENTRY_DATA2(rv));
+ }
+ }
+
+ if (!rv) {
+ if (compoundflag &&
+ !(rv =
+ prefix_check(st.c_str(), i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundflag))) {
+ if (((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL,
+ compoundflag,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) &&
+ !hu_mov_rule && sfx->getCont() &&
+ ((compoundforbidflag &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())) ||
+ (compoundend &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ rv = NULL;
+ }
+ }
+
+ if (rv ||
+ (((wordnum == 0) && compoundbegin &&
+ ((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL,
+ compoundbegin,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundbegin))) || // twofold suffix+compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundbegin)))) ||
+ ((wordnum > 0) && compoundmiddle &&
+ ((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL,
+ compoundmiddle,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes &&
+ (rv = suffix_check_twosfx(
+ st.c_str(), i, 0, NULL,
+ compoundmiddle))) || // twofold suffix+compound
+ (rv = prefix_check(st.c_str(), i,
+ hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN,
+ compoundmiddle)))))) {
+ std::string p;
+ if (compoundflag)
+ p = affix_check_morph(st.c_str(), i, compoundflag);
+ if (p.empty()) {
+ if ((wordnum == 0) && compoundbegin) {
+ p = affix_check_morph(st.c_str(), i, compoundbegin);
+ } else if ((wordnum > 0) && compoundmiddle) {
+ p = affix_check_morph(st.c_str(), i, compoundmiddle);
+ }
+ }
+ if (!p.empty()) {
+ presult.push_back(MSEP_FLD);
+ presult.append(MORPH_PART);
+ presult.append(st.c_str());
+ line_uniq_app(p, MSEP_REC);
+ presult.append(p);
+ }
+ checked_prefix = 1;
+ }
+ // else check forbiddenwords
+ } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, needaffix, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())))) {
+ continue;
+ }
+
+ // check compoundend flag in suffix and prefix
+ if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) {
+ continue;
+ }
+
+ // check compoundmiddle flag in suffix and prefix
+ if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle &&
+ !hu_mov_rule &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)))
+ continue;
+
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // first word is acceptable in compound words?
+ if (((rv) &&
+ (checked_prefix || (words && words[wnum]) ||
+ (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ ((oldwordnum == 0) && compoundbegin &&
+ TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
+ ((oldwordnum > 0) && compoundmiddle &&
+ TESTAFF(rv->astr, compoundmiddle, rv->alen))
+ // LANG_hu section: spec. Hungarian rule
+ || ((langnum == LANG_hu) && // hu_mov_rule
+ hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen) ||
+ TESTAFF(rv->astr, 'G', rv->alen) ||
+ TESTAFF(rv->astr, 'H', rv->alen)))
+ // END of LANG_hu section
+ ) &&
+ !((checkcompoundtriple && !words && // test triple letters
+ (word[i - 1] == word[i]) &&
+ (((i > 1) && (word[i - 1] == word[i - 2])) ||
+ ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0'
+ )) ||
+ (
+ // test CHECKCOMPOUNDPATTERN
+ !checkcpdtable.empty() && !words &&
+ cpdpat_check(word, i, rv, NULL, affixed)) ||
+ (checkcompoundcase && !words && cpdcase_check(word, i))))
+ // LANG_hu section: spec. Hungarian rule
+ ||
+ ((!rv) && (langnum == LANG_hu) && hu_mov_rule &&
+ (rv = affix_check(st.c_str(), i)) &&
+ (sfx && sfx->getCont() &&
+ (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) ||
+ TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen()))))
+ // END of LANG_hu section
+ ) {
+ // LANG_hu section: spec. Hungarian rule
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(st.substr(0, i));
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+ }
+ // END of LANG_hu section
+
+ // NEXT WORD(S)
+ rv_first = rv;
+ rv = lookup((word + i)); // perhaps without prefix
+
+ // search homonym with compound flag
+ while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
+ !((compoundflag && !words &&
+ TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && !words &&
+ TESTAFF(rv->astr, compoundend, rv->alen)) ||
+ (!defcpdtable.empty() && words &&
+ defcpd_check(&words, wnum + 1, rv, NULL, 1))))) {
+ rv = rv->next_homonym;
+ }
+
+ if (rv && words && words[wnum + 1]) {
+ result.append(presult);
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_PART);
+ result.append(word + i);
+ if (complexprefixes && HENTRY_DATA(rv))
+ result.append(HENTRY_DATA2(rv));
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(rv));
+ }
+ result.push_back(MSEP_REC);
+ return 0;
+ }
+
+ oldnumsyllable2 = numsyllable;
+ oldwordnum2 = wordnum;
+
+ // LANG_hu section: spec. Hungarian rule
+ if ((rv) && (langnum == LANG_hu) &&
+ (TESTAFF(rv->astr, 'I', rv->alen)) &&
+ !(TESTAFF(rv->astr, 'J', rv->alen))) {
+ numsyllable--;
+ }
+ // END of LANG_hu section
+ // increment word number, if the second root has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ // second word is acceptable, as a root?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist of 2 words, or if more,
+ // then the syllable number of root words must be 6, or lesser.
+ if ((rv) &&
+ ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
+ (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) &&
+ (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <=
+ cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ // bad compound word
+ result.append(presult);
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_PART);
+ result.append(word + i);
+
+ if (HENTRY_DATA(rv)) {
+ if (complexprefixes)
+ result.append(HENTRY_DATA2(rv));
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(rv));
+ }
+ // store the pointer of the hash entry
+ if (!complexprefixes) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(rv));
+ }
+ }
+ result.push_back(MSEP_REC);
+ ok = 1;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word has prefix or/and suffix
+ sfx = NULL;
+ sfxflag = FLAG_NULL;
+
+ if (compoundflag && !onlycpdrule)
+ rv = affix_check((word + i), strlen(word + i), compoundflag);
+ else
+ rv = NULL;
+
+ if (!rv && compoundend && !onlycpdrule) {
+ sfx = NULL;
+ pfx = NULL;
+ rv = affix_check((word + i), strlen(word + i), compoundend);
+ }
+
+ if (!rv && !defcpdtable.empty() && words) {
+ rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END);
+ if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
+ std::string m;
+ if (compoundflag)
+ m = affix_check_morph((word + i), strlen(word + i), compoundflag);
+ if (m.empty() && compoundend) {
+ m = affix_check_morph((word + i), strlen(word + i), compoundend);
+ }
+ result.append(presult);
+ if (!m.empty()) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_PART);
+ result.append(word + i);
+ line_uniq_app(m, MSEP_REC);
+ result.append(m);
+ }
+ result.push_back(MSEP_REC);
+ ok = 1;
+ }
+ }
+
+ // check non_compound flag in suffix and prefix
+ if ((rv) &&
+ ((pfx && pfx->getCont() &&
+ TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) ||
+ (sfx && sfx->getCont() &&
+ TESTAFF(sfx->getCont(), compoundforbidflag,
+ sfx->getContLen())))) {
+ rv = NULL;
+ }
+
+ // check forbiddenwords
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)) &&
+ (!TESTAFF(rv->astr, needaffix, rv->alen))) {
+ st[i] = ch;
+ continue;
+ }
+
+ if (langnum == LANG_hu) {
+ // calculate syllable number of the word
+ numsyllable += get_syllable(word + i);
+
+ // - affix syllable num.
+ // XXX only second suffix (inflections, not derivations)
+ if (sfxappnd) {
+ std::string tmp(sfxappnd);
+ reverseword(tmp);
+ numsyllable -= short(get_syllable(tmp) + sfxextra);
+ } else {
+ numsyllable -= short(sfxextra);
+ }
+
+ // + 1 word, if syllable number of the prefix > 1 (hungarian
+ // convention)
+ if (pfx && (get_syllable(pfx->getKey()) > 1))
+ wordnum++;
+
+ // increment syllable num, if last word has a SYLLABLENUM flag
+ // and the suffix is beginning `s'
+
+ if (!cpdsyllablenum.empty()) {
+ switch (sfxflag) {
+ case 'c': {
+ numsyllable += 2;
+ break;
+ }
+ case 'J': {
+ numsyllable += 1;
+ break;
+ }
+ case 'I': {
+ if (rv && TESTAFF(rv->astr, 'J', rv->alen))
+ numsyllable += 1;
+ break;
+ }
+ }
+ }
+ }
+
+ // increment word number, if the second word has a compoundroot flag
+ if ((rv) && (compoundroot) &&
+ (TESTAFF(rv->astr, compoundroot, rv->alen))) {
+ wordnum++;
+ }
+ // second word is acceptable, as a word with prefix or/and suffix?
+ // hungarian conventions: compounding is acceptable,
+ // when compound forms consist 2 word, otherwise
+ // the syllable number of root words is 6, or lesser.
+ if ((rv) &&
+ (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
+ ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) &&
+ ((!checkcompounddup || (rv != rv_first)))) {
+ std::string m;
+ if (compoundflag)
+ m = affix_check_morph((word + i), strlen(word + i), compoundflag);
+ if (m.empty() && compoundend) {
+ m = affix_check_morph((word + i), strlen(word + i), compoundend);
+ }
+ result.append(presult);
+ if (!m.empty()) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_PART);
+ result.append(word + i);
+ line_uniq_app(m, MSEP_REC);
+ result.push_back(MSEP_FLD);
+ result.append(m);
+ }
+ result.push_back(MSEP_REC);
+ ok = 1;
+ }
+
+ numsyllable = oldnumsyllable2;
+ wordnum = oldwordnum2;
+
+ // perhaps second word is a compound word (recursive call)
+ if ((wordnum + 2 < maxwordnum) && (ok == 0)) {
+ compound_check_morph((word + i), strlen(word + i), wordnum + 1,
+ numsyllable, maxwordnum, wnum + 1, words, rwords, 0,
+ result, &presult);
+ } else {
+ rv = NULL;
+ }
+ }
+ st[i] = ch;
+ wordnum = oldwordnum;
+ numsyllable = oldnumsyllable;
+
+ } while (!defcpdtable.empty() && oldwordnum == 0 &&
+ onlycpdrule++ < 1); // end of onlycpd loop
+ }
+ return 0;
+}
+
+
+inline int AffixMgr::isRevSubset(const char* s1,
+ const char* end_of_s2,
+ int len) {
+ while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
+ s1++;
+ end_of_s2--;
+ len--;
+ }
+ return (*s1 == '\0');
+}
+
+// check word for suffixes
+struct hentry* AffixMgr::suffix_check(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag,
+ char in_compound) {
+ struct hentry* rv = NULL;
+ PfxEntry* ep = ppfx;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+
+ while (se) {
+ if (!cclass || se->getCont()) {
+ // suffixes are not allowed in beginning of compounds
+ if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (se->getCont() && compoundpermitflag &&
+ TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!se->getCont() ||
+ !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (se->getCont() &&
+ (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !(se->getCont() &&
+ (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(se->getCont() &&
+ TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) {
+ rv = se->checkword(word, len, sfxopts, ppfx,
+ (FLAG)cclass, needflag,
+ (in_compound ? 0 : onlyincompound));
+ if (rv) {
+ sfx = se; // BUG: sfx not stateless
+ return rv;
+ }
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ // suffixes are not allowed in beginning of compounds
+ if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (sptr->getCont() && compoundpermitflag &&
+ TESTAFF(sptr->getCont(), compoundpermitflag,
+ sptr->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!sptr->getCont() ||
+ !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (sptr->getCont() &&
+ (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,
+ sptr->getContLen()))))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen())))))
+ if (in_compound != IN_CPD_END || ppfx ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
+ rv = sptr->checkword(word, len, sfxopts, ppfx,
+ cclass, needflag,
+ (in_compound ? 0 : onlyincompound));
+ if (rv) {
+ sfx = sptr; // BUG: sfx not stateless
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ // LANG_hu section: spec. Hungarian rule
+ else if (langnum == LANG_hu && sptr->getKeyLen() &&
+ sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' &&
+ sptr->getKey()[1] != 't') {
+ sfxextra = 1;
+ }
+ // END of LANG_hu section
+ return rv;
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for two-level suffixes
+struct hentry* AffixMgr::suffix_check_twosfx(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ struct hentry* rv = NULL;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (contclasses[se->getFlag()]) {
+ rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag);
+ if (rv)
+ return rv;
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return NULL; // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ if (contclasses[sptr->getFlag()]) {
+ rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag);
+ if (rv) {
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ return rv;
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return NULL;
+}
+
+// check word for two-level suffixes and morph
+std::string AffixMgr::suffix_check_twosfx_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag) {
+ std::string result;
+ std::string result2;
+ std::string result3;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (contclasses[se->getFlag()]) {
+ std::string st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
+ if (!st.empty()) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ result.append(ppfx->getMorph());
+ result.push_back(MSEP_FLD);
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ result.append(st);
+ if (se->getMorph()) {
+ result.push_back(MSEP_FLD);
+ result.append(se->getMorph());
+ } else
+ debugflag(result, se->getFlag());
+ result.push_back(MSEP_REC);
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return std::string(); // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ if (contclasses[sptr->getFlag()]) {
+ std::string st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag);
+ if (!st.empty()) {
+ sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
+ if (!sptr->getCont())
+ sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless
+ result2.assign(st);
+
+ result3.clear();
+
+ if (sptr->getMorph()) {
+ result3.push_back(MSEP_FLD);
+ result3.append(sptr->getMorph());
+ } else
+ debugflag(result3, sptr->getFlag());
+ strlinecat(result2, result3);
+ result2.push_back(MSEP_REC);
+ result.append(result2);
+ }
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return result;
+}
+
+std::string AffixMgr::suffix_check_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass,
+ const FLAG needflag,
+ char in_compound) {
+ std::string result;
+
+ struct hentry* rv = NULL;
+
+ PfxEntry* ep = ppfx;
+
+ // first handle the special case of 0 length suffixes
+ SfxEntry* se = sStart[0];
+ while (se) {
+ if (!cclass || se->getCont()) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (se->getCont() && compoundpermitflag &&
+ TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!se->getCont() ||
+ !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (se->getCont() &&
+ (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((se->getCont() &&
+ (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
+ // needaffix on prefix or first suffix
+ (cclass ||
+ !(se->getCont() &&
+ TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
+ (ppfx &&
+ !((ep->getCont()) &&
+ TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))))
+ rv = se->checkword(word, len, sfxopts, ppfx, cclass,
+ needflag, FLAG_NULL);
+ while (rv) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ result.append(ppfx->getMorph());
+ result.push_back(MSEP_FLD);
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ if (complexprefixes && HENTRY_DATA(rv))
+ result.append(HENTRY_DATA2(rv));
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(rv));
+ }
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(rv));
+ }
+ if (se->getMorph()) {
+ result.push_back(MSEP_FLD);
+ result.append(se->getMorph());
+ } else
+ debugflag(result, se->getFlag());
+ result.push_back(MSEP_REC);
+ rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+ }
+ }
+ se = se->getNext();
+ }
+
+ // now handle the general case
+ if (len == 0)
+ return std::string(); // FULLSTRIP
+ unsigned char sp = *((const unsigned char*)(word + len - 1));
+ SfxEntry* sptr = sStart[sp];
+
+ while (sptr) {
+ if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
+ // suffixes are not allowed in beginning of compounds
+ if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
+ // except when signed with compoundpermitflag flag
+ (sptr->getCont() && compoundpermitflag &&
+ TESTAFF(sptr->getCont(), compoundpermitflag,
+ sptr->getContLen()))) &&
+ (!circumfix ||
+ // no circumfix flag in prefix and suffix
+ ((!ppfx || !(ep->getCont()) ||
+ !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (!sptr->getCont() ||
+ !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) ||
+ // circumfix flag in prefix AND suffix
+ ((ppfx && (ep->getCont()) &&
+ TESTAFF(ep->getCont(), circumfix, ep->getContLen())) &&
+ (sptr->getCont() &&
+ (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) &&
+ // fogemorpheme
+ (in_compound ||
+ !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound,
+ sptr->getContLen()))))) &&
+ // needaffix on first suffix
+ (cclass ||
+ !(sptr->getCont() &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))))
+ rv = sptr->checkword(word, len, sfxopts, ppfx, cclass,
+ needflag, FLAG_NULL);
+ while (rv) {
+ if (ppfx) {
+ if (ppfx->getMorph()) {
+ result.append(ppfx->getMorph());
+ result.push_back(MSEP_FLD);
+ } else
+ debugflag(result, ppfx->getFlag());
+ }
+ if (complexprefixes && HENTRY_DATA(rv))
+ result.append(HENTRY_DATA2(rv));
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(HENTRY_WORD(rv));
+ }
+
+ if (!complexprefixes && HENTRY_DATA(rv)) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(rv));
+ }
+
+ if (sptr->getMorph()) {
+ result.push_back(MSEP_FLD);
+ result.append(sptr->getMorph());
+ } else
+ debugflag(result, sptr->getFlag());
+ result.push_back(MSEP_REC);
+ rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
+ }
+ sptr = sptr->getNextEQ();
+ } else {
+ sptr = sptr->getNextNE();
+ }
+ }
+
+ return result;
+}
+
+// check if word with affixes is correctly spelled
+struct hentry* AffixMgr::affix_check(const char* word,
+ int len,
+ const FLAG needflag,
+ char in_compound) {
+
+ // check all prefixes (also crossed with suffixes if allowed)
+ struct hentry* rv = prefix_check(word, len, in_compound, needflag);
+ if (rv)
+ return rv;
+
+ // if still not found check all suffixes
+ rv = suffix_check(word, len, 0, NULL, FLAG_NULL, needflag, in_compound);
+
+ if (havecontclass) {
+ sfx = NULL;
+ pfx = NULL;
+
+ if (rv)
+ return rv;
+ // if still not found check all two-level suffixes
+ rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
+
+ if (rv)
+ return rv;
+ // if still not found check all two-level suffixes
+ rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
+ }
+
+ return rv;
+}
+
+// check if word with affixes is correctly spelled
+std::string AffixMgr::affix_check_morph(const char* word,
+ int len,
+ const FLAG needflag,
+ char in_compound) {
+ std::string result;
+
+ // check all prefixes (also crossed with suffixes if allowed)
+ std::string st = prefix_check_morph(word, len, in_compound);
+ if (!st.empty()) {
+ result.append(st);
+ }
+
+ // if still not found check all suffixes
+ st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
+ if (!st.empty()) {
+ result.append(st);
+ }
+
+ if (havecontclass) {
+ sfx = NULL;
+ pfx = NULL;
+ // if still not found check all two-level suffixes
+ st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ }
+
+ // if still not found check all two-level suffixes
+ st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ }
+
+ return result;
+}
+
+// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
+// in the first line of the inputs
+// return 0, if inputs equal
+// return 1, if inputs may equal with a secondary suffix
+// otherwise return -1
+static int morphcmp(const char* s, const char* t) {
+ int se = 0;
+ int te = 0;
+ const char* sl;
+ const char* tl;
+ const char* olds;
+ const char* oldt;
+ if (!s || !t)
+ return 1;
+ olds = s;
+ sl = strchr(s, '\n');
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s))
+ s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ tl = strchr(t, '\n');
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t))
+ t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
+ s += MORPH_TAG_LEN;
+ t += MORPH_TAG_LEN;
+ se = 0;
+ te = 0;
+ while ((*s == *t) && !se && !te) {
+ s++;
+ t++;
+ switch (*s) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0':
+ se = 1;
+ }
+ switch (*t) {
+ case ' ':
+ case '\n':
+ case '\t':
+ case '\0':
+ te = 1;
+ }
+ }
+ if (!se || !te) {
+ // not terminal suffix difference
+ if (olds)
+ return -1;
+ return 1;
+ }
+ olds = s;
+ s = strstr(s, MORPH_DERI_SFX);
+ if (!s || (sl && sl < s))
+ s = strstr(olds, MORPH_INFL_SFX);
+ if (!s || (sl && sl < s)) {
+ s = strstr(olds, MORPH_TERM_SFX);
+ olds = NULL;
+ }
+ oldt = t;
+ t = strstr(t, MORPH_DERI_SFX);
+ if (!t || (tl && tl < t))
+ t = strstr(oldt, MORPH_INFL_SFX);
+ if (!t || (tl && tl < t)) {
+ t = strstr(oldt, MORPH_TERM_SFX);
+ oldt = NULL;
+ }
+ }
+ if (!s && !t && se && te)
+ return 0;
+ return 1;
+}
+
+std::string AffixMgr::morphgen(const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* morph,
+ const char* targetmorph,
+ int level) {
+ // handle suffixes
+ if (!morph)
+ return std::string();
+
+ // check substandard flag
+ if (TESTAFF(ap, substandard, al))
+ return std::string();
+
+ if (morphcmp(morph, targetmorph) == 0)
+ return ts;
+
+ size_t stemmorphcatpos;
+ std::string mymorph;
+
+ // use input suffix fields, if exist
+ if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
+ mymorph.assign(morph);
+ mymorph.push_back(MSEP_FLD);
+ stemmorphcatpos = mymorph.size();
+ } else {
+ stemmorphcatpos = std::string::npos;
+ }
+
+ for (int i = 0; i < al; i++) {
+ const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
+ SfxEntry* sptr = sFlag[c];
+ while (sptr) {
+ if (sptr->getFlag() == ap[i] && sptr->getMorph() &&
+ ((sptr->getContLen() == 0) ||
+ // don't generate forms with substandard affixes
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
+ const char* stemmorph;
+ if (stemmorphcatpos != std::string::npos) {
+ mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph());
+ stemmorph = mymorph.c_str();
+ } else {
+ stemmorph = sptr->getMorph();
+ }
+
+ int cmp = morphcmp(stemmorph, targetmorph);
+
+ if (cmp == 0) {
+ std::string newword = sptr->add(ts, wl);
+ if (!newword.empty()) {
+ hentry* check = pHMgr->lookup(newword.c_str()); // XXX extra dic
+ if (!check || !check->astr ||
+ !(TESTAFF(check->astr, forbiddenword, check->alen) ||
+ TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
+ return newword;
+ }
+ }
+ }
+
+ // recursive call for secondary suffixes
+ if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
+ !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
+ std::string newword = sptr->add(ts, wl);
+ if (!newword.empty()) {
+ std::string newword2 =
+ morphgen(newword.c_str(), newword.size(), sptr->getCont(),
+ sptr->getContLen(), stemmorph, targetmorph, 1);
+
+ if (!newword2.empty()) {
+ return newword2;
+ }
+ }
+ }
+ }
+ sptr = sptr->getFlgNxt();
+ }
+ }
+ return std::string();
+}
+
+int AffixMgr::expand_rootword(struct guessword* wlst,
+ int maxn,
+ const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* bad,
+ int badl,
+ const char* phon) {
+ int nh = 0;
+ // first add root word to list
+ if ((nh < maxn) &&
+ !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
+ (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
+ wlst[nh].word = mystrdup(ts);
+ if (!wlst[nh].word)
+ return 0;
+ wlst[nh].allow = false;
+ wlst[nh].orig = NULL;
+ nh++;
+ // add special phonetic version
+ if (phon && (nh < maxn)) {
+ wlst[nh].word = mystrdup(phon);
+ if (!wlst[nh].word)
+ return nh - 1;
+ wlst[nh].allow = false;
+ wlst[nh].orig = mystrdup(ts);
+ if (!wlst[nh].orig)
+ return nh - 1;
+ nh++;
+ }
+ }
+
+ // handle suffixes
+ for (int i = 0; i < al; i++) {
+ const unsigned char c = (unsigned char)(ap[i] & 0x00FF);
+ SfxEntry* sptr = sFlag[c];
+ while (sptr) {
+ if ((sptr->getFlag() == ap[i]) &&
+ (!sptr->getKeyLen() ||
+ ((badl > sptr->getKeyLen()) &&
+ (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
+ // check needaffix flag
+ !(sptr->getCont() &&
+ ((needaffix &&
+ TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
+ (circumfix &&
+ TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
+ (onlyincompound &&
+ TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) {
+ std::string newword = sptr->add(ts, wl);
+ if (!newword.empty()) {
+ if (nh < maxn) {
+ wlst[nh].word = mystrdup(newword.c_str());
+ wlst[nh].allow = sptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ // add special phonetic version
+ if (phon && (nh < maxn)) {
+ std::string prefix(phon);
+ std::string key(sptr->getKey());
+ reverseword(key);
+ prefix.append(key);
+ wlst[nh].word = mystrdup(prefix.c_str());
+ if (!wlst[nh].word)
+ return nh - 1;
+ wlst[nh].allow = false;
+ wlst[nh].orig = mystrdup(newword.c_str());
+ if (!wlst[nh].orig)
+ return nh - 1;
+ nh++;
+ }
+ }
+ }
+ }
+ sptr = sptr->getFlgNxt();
+ }
+ }
+
+ int n = nh;
+
+ // handle cross products of prefixes and suffixes
+ for (int j = 1; j < n; j++)
+ if (wlst[j].allow) {
+ for (int k = 0; k < al; k++) {
+ const unsigned char c = (unsigned char)(ap[k] & 0x00FF);
+ PfxEntry* cptr = pFlag[c];
+ while (cptr) {
+ if ((cptr->getFlag() == ap[k]) && cptr->allowCross() &&
+ (!cptr->getKeyLen() ||
+ ((badl > cptr->getKeyLen()) &&
+ (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
+ int l1 = strlen(wlst[j].word);
+ std::string newword = cptr->add(wlst[j].word, l1);
+ if (!newword.empty()) {
+ if (nh < maxn) {
+ wlst[nh].word = mystrdup(newword.c_str());
+ wlst[nh].allow = cptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ }
+ }
+ }
+ cptr = cptr->getFlgNxt();
+ }
+ }
+ }
+
+ // now handle pure prefixes
+ for (int m = 0; m < al; m++) {
+ const unsigned char c = (unsigned char)(ap[m] & 0x00FF);
+ PfxEntry* ptr = pFlag[c];
+ while (ptr) {
+ if ((ptr->getFlag() == ap[m]) &&
+ (!ptr->getKeyLen() ||
+ ((badl > ptr->getKeyLen()) &&
+ (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
+ // check needaffix flag
+ !(ptr->getCont() &&
+ ((needaffix &&
+ TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
+ (circumfix &&
+ TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
+ (onlyincompound &&
+ TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))) {
+ std::string newword = ptr->add(ts, wl);
+ if (!newword.empty()) {
+ if (nh < maxn) {
+ wlst[nh].word = mystrdup(newword.c_str());
+ wlst[nh].allow = ptr->allowCross();
+ wlst[nh].orig = NULL;
+ nh++;
+ }
+ }
+ }
+ ptr = ptr->getFlgNxt();
+ }
+ }
+
+ return nh;
+}
+
+// return replacing table
+const std::vector<replentry>& AffixMgr::get_reptable() const {
+ return pHMgr->get_reptable();
+}
+
+// return iconv table
+RepList* AffixMgr::get_iconvtable() const {
+ if (!iconvtable)
+ return NULL;
+ return iconvtable;
+}
+
+// return oconv table
+RepList* AffixMgr::get_oconvtable() const {
+ if (!oconvtable)
+ return NULL;
+ return oconvtable;
+}
+
+// return replacing table
+struct phonetable* AffixMgr::get_phonetable() const {
+ if (!phone)
+ return NULL;
+ return phone;
+}
+
+// return character map table
+const std::vector<mapentry>& AffixMgr::get_maptable() const {
+ return maptable;
+}
+
+// return character map table
+const std::vector<std::string>& AffixMgr::get_breaktable() const {
+ return breaktable;
+}
+
+// return text encoding of dictionary
+const std::string& AffixMgr::get_encoding() {
+ if (encoding.empty())
+ encoding = SPELL_ENCODING;
+ return encoding;
+}
+
+// return text encoding of dictionary
+int AffixMgr::get_langnum() const {
+ return langnum;
+}
+
+// return double prefix option
+int AffixMgr::get_complexprefixes() const {
+ return complexprefixes;
+}
+
+// return FULLSTRIP option
+int AffixMgr::get_fullstrip() const {
+ return fullstrip;
+}
+
+FLAG AffixMgr::get_keepcase() const {
+ return keepcase;
+}
+
+FLAG AffixMgr::get_forceucase() const {
+ return forceucase;
+}
+
+FLAG AffixMgr::get_warn() const {
+ return warn;
+}
+
+int AffixMgr::get_forbidwarn() const {
+ return forbidwarn;
+}
+
+int AffixMgr::get_checksharps() const {
+ return checksharps;
+}
+
+char* AffixMgr::encode_flag(unsigned short aflag) const {
+ return pHMgr->encode_flag(aflag);
+}
+
+// return the preferred ignore string for suggestions
+const char* AffixMgr::get_ignore() const {
+ if (ignorechars.empty())
+ return NULL;
+ return ignorechars.c_str();
+}
+
+// return the preferred ignore string for suggestions
+const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
+ return ignorechars_utf16;
+}
+
+// return the keyboard string for suggestions
+char* AffixMgr::get_key_string() {
+ if (keystring.empty())
+ keystring = SPELL_KEYSTRING;
+ return mystrdup(keystring.c_str());
+}
+
+// return the preferred try string for suggestions
+char* AffixMgr::get_try_string() const {
+ if (trystring.empty())
+ return NULL;
+ return mystrdup(trystring.c_str());
+}
+
+// return the preferred try string for suggestions
+const std::string& AffixMgr::get_wordchars() const {
+ return wordchars;
+}
+
+const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
+ return wordchars_utf16;
+}
+
+// is there compounding?
+int AffixMgr::get_compound() const {
+ return compoundflag || compoundbegin || !defcpdtable.empty();
+}
+
+// return the compound words control flag
+FLAG AffixMgr::get_compoundflag() const {
+ return compoundflag;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_forbiddenword() const {
+ return forbiddenword;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_nosuggest() const {
+ return nosuggest;
+}
+
+// return the forbidden words control flag
+FLAG AffixMgr::get_nongramsuggest() const {
+ return nongramsuggest;
+}
+
+// return the substandard root/affix control flag
+FLAG AffixMgr::get_substandard() const {
+ return substandard;
+}
+
+// return the forbidden words flag modify flag
+FLAG AffixMgr::get_needaffix() const {
+ return needaffix;
+}
+
+// return the onlyincompound flag
+FLAG AffixMgr::get_onlyincompound() const {
+ return onlyincompound;
+}
+
+// return the value of suffix
+const std::string& AffixMgr::get_version() const {
+ return version;
+}
+
+// utility method to look up root words in hash table
+struct hentry* AffixMgr::lookup(const char* word) {
+ struct hentry* he = NULL;
+ for (size_t i = 0; i < alldic.size() && !he; ++i) {
+ he = alldic[i]->lookup(word);
+ }
+ return he;
+}
+
+// return the value of suffix
+int AffixMgr::have_contclass() const {
+ return havecontclass;
+}
+
+// return utf8
+int AffixMgr::get_utf8() const {
+ return utf8;
+}
+
+int AffixMgr::get_maxngramsugs(void) const {
+ return maxngramsugs;
+}
+
+int AffixMgr::get_maxcpdsugs(void) const {
+ return maxcpdsugs;
+}
+
+int AffixMgr::get_maxdiff(void) const {
+ return maxdiff;
+}
+
+int AffixMgr::get_onlymaxdiff(void) const {
+ return onlymaxdiff;
+}
+
+// return nosplitsugs
+int AffixMgr::get_nosplitsugs(void) const {
+ return nosplitsugs;
+}
+
+// return sugswithdots
+int AffixMgr::get_sugswithdots(void) const {
+ return sugswithdots;
+}
+
+/* parse flag */
+bool AffixMgr::parse_flag(const std::string& line, unsigned short* out, FileMgr* af) {
+ if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix file parameter\n",
+ af->getlinenum());
+ return false;
+ }
+ std::string s;
+ if (!parse_string(line, s, af->getlinenum()))
+ return false;
+ *out = pHMgr->decode_flag(s.c_str());
+ return true;
+}
+
+/* parse num */
+bool AffixMgr::parse_num(const std::string& line, int* out, FileMgr* af) {
+ if (*out != -1) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix file parameter\n",
+ af->getlinenum());
+ return false;
+ }
+ std::string s;
+ if (!parse_string(line, s, af->getlinenum()))
+ return false;
+ *out = atoi(s.c_str());
+ return true;
+}
+
+/* parse in the max syllablecount of compound words and */
+bool AffixMgr::parse_cpdsyllable(const std::string& line, FileMgr* af) {
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ cpdmaxsyllable = atoi(std::string(start_piece, iter).c_str());
+ np++;
+ break;
+ }
+ case 2: {
+ if (!utf8) {
+ cpdvowels.assign(start_piece, iter);
+ std::sort(cpdvowels.begin(), cpdvowels.end());
+ } else {
+ std::string piece(start_piece, iter);
+ u8_u16(cpdvowels_utf16, piece);
+ std::sort(cpdvowels_utf16.begin(), cpdvowels_utf16.end());
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np < 2) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: missing compoundsyllable information\n",
+ af->getlinenum());
+ return false;
+ }
+ if (np == 2)
+ cpdvowels = "AEIOUaeiou";
+ return true;
+}
+
+bool AffixMgr::parse_convtable(const std::string& line,
+ FileMgr* af,
+ RepList** rl,
+ const std::string& keyword) {
+ if (*rl) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ int i = 0;
+ int np = 0;
+ int numrl = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numrl = atoi(std::string(start_piece, iter).c_str());
+ if (numrl < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ *rl = new RepList(numrl);
+ if (!*rl)
+ return false;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the num lines to read in the remainder of the table */
+ for (int j = 0; j < numrl; j++) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ std::string pattern;
+ std::string pattern2;
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), keyword.size(), keyword, 0, keyword.size()) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ delete *rl;
+ *rl = NULL;
+ return false;
+ }
+ break;
+ }
+ case 1: {
+ pattern.assign(start_piece, iter);
+ break;
+ }
+ case 2: {
+ pattern2.assign(start_piece, iter);
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ }
+ start_piece = mystrsep(nl, iter);
+ }
+ if (pattern.empty() || pattern2.empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ (*rl)->add(pattern, pattern2);
+ }
+ return true;
+}
+
+/* parse in the typical fault correcting table */
+bool AffixMgr::parse_phonetable(const std::string& line, FileMgr* af) {
+ if (phone) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ int num = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ num = atoi(std::string(start_piece, iter).c_str());
+ if (num < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ phone = new phonetable;
+ phone->utf8 = (char)utf8;
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the phone->num lines to read in the remainder of the table */
+ for (int j = 0; j < num; ++j) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ const size_t old_size = phone->rules.size();
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 5, "PHONE", 5) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ break;
+ }
+ case 1: {
+ phone->rules.push_back(std::string(start_piece, iter));
+ break;
+ }
+ case 2: {
+ phone->rules.push_back(std::string(start_piece, iter));
+ mystrrep(phone->rules.back(), "_", "");
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ }
+ start_piece = mystrsep(nl, iter);
+ }
+ if (phone->rules.size() != old_size + 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ phone->rules.clear();
+ return false;
+ }
+ }
+ phone->rules.push_back("");
+ phone->rules.push_back("");
+ init_phonet_hash(*phone);
+ return true;
+}
+
+/* parse in the checkcompoundpattern table */
+bool AffixMgr::parse_checkcpdtable(const std::string& line, FileMgr* af) {
+ if (parsedcheckcpd) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ parsedcheckcpd = true;
+ int numcheckcpd = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numcheckcpd = atoi(std::string(start_piece, iter).c_str());
+ if (numcheckcpd < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ checkcpdtable.reserve(numcheckcpd);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numcheckcpd lines to read in the remainder of the table */
+ for (int j = 0; j < numcheckcpd; ++j) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ checkcpdtable.push_back(patentry());
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 20, "CHECKCOMPOUNDPATTERN", 20) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ break;
+ }
+ case 1: {
+ checkcpdtable.back().pattern.assign(start_piece, iter);
+ size_t slash_pos = checkcpdtable.back().pattern.find('/');
+ if (slash_pos != std::string::npos) {
+ std::string chunk(checkcpdtable.back().pattern, slash_pos + 1);
+ checkcpdtable.back().pattern.resize(slash_pos);
+ checkcpdtable.back().cond = pHMgr->decode_flag(chunk.c_str());
+ }
+ break;
+ }
+ case 2: {
+ checkcpdtable.back().pattern2.assign(start_piece, iter);
+ size_t slash_pos = checkcpdtable.back().pattern2.find('/');
+ if (slash_pos != std::string::npos) {
+ std::string chunk(checkcpdtable.back().pattern2, slash_pos + 1);
+ checkcpdtable.back().pattern2.resize(slash_pos);
+ checkcpdtable.back().cond2 = pHMgr->decode_flag(chunk.c_str());
+ }
+ break;
+ }
+ case 3: {
+ checkcpdtable.back().pattern3.assign(start_piece, iter);
+ simplifiedcpd = 1;
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ return true;
+}
+
+/* parse in the compound rule table */
+bool AffixMgr::parse_defcpdtable(const std::string& line, FileMgr* af) {
+ if (parseddefcpd) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ parseddefcpd = true;
+ int numdefcpd = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numdefcpd = atoi(std::string(start_piece, iter).c_str());
+ if (numdefcpd < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ defcpdtable.reserve(numdefcpd);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numdefcpd lines to read in the remainder of the table */
+ for (int j = 0; j < numdefcpd; ++j) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ defcpdtable.push_back(flagentry());
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 12, "COMPOUNDRULE", 12) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numdefcpd = 0;
+ return false;
+ }
+ break;
+ }
+ case 1: { // handle parenthesized flags
+ if (std::find(start_piece, iter, '(') != iter) {
+ for (std::string::const_iterator k = start_piece; k != iter; ++k) {
+ std::string::const_iterator chb = k;
+ std::string::const_iterator che = k + 1;
+ if (*k == '(') {
+ std::string::const_iterator parpos = std::find(k, iter, ')');
+ if (parpos != iter) {
+ chb = k + 1;
+ che = parpos;
+ k = parpos;
+ }
+ }
+
+ if (*chb == '*' || *chb == '?') {
+ defcpdtable.back().push_back((FLAG)*chb);
+ } else {
+ pHMgr->decode_flags(defcpdtable.back(), std::string(chb, che), af);
+ }
+ }
+ } else {
+ pHMgr->decode_flags(defcpdtable.back(), std::string(start_piece, iter), af);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ if (defcpdtable.back().empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+ return true;
+}
+
+/* parse in the character map table */
+bool AffixMgr::parse_maptable(const std::string& line, FileMgr* af) {
+ if (parsedmaptable) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ parsedmaptable = true;
+ int nummap = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ nummap = atoi(std::string(start_piece, iter).c_str());
+ if (nummap < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ maptable.reserve(nummap);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the nummap lines to read in the remainder of the table */
+ for (int j = 0; j < nummap; ++j) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ maptable.push_back(mapentry());
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 3, "MAP", 3) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ nummap = 0;
+ return false;
+ }
+ break;
+ }
+ case 1: {
+ for (std::string::const_iterator k = start_piece; k != iter; ++k) {
+ std::string::const_iterator chb = k;
+ std::string::const_iterator che = k + 1;
+ if (*k == '(') {
+ std::string::const_iterator parpos = std::find(k, iter, ')');
+ if (parpos != iter) {
+ chb = k + 1;
+ che = parpos;
+ k = parpos;
+ }
+ } else {
+ if (utf8 && (*k & 0xc0) == 0xc0) {
+ ++k;
+ while (k != iter && (*k & 0xc0) == 0x80)
+ ++k;
+ che = k;
+ --k;
+ }
+ }
+ maptable.back().push_back(std::string(chb, che));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ if (maptable.back().empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+ return true;
+}
+
+/* parse in the word breakpoint table */
+bool AffixMgr::parse_breaktable(const std::string& line, FileMgr* af) {
+ if (parsedbreaktable) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ parsedbreaktable = true;
+ int numbreak = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numbreak = atoi(std::string(start_piece, iter).c_str());
+ if (numbreak < 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ if (numbreak == 0)
+ return true;
+ breaktable.reserve(numbreak);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numbreak lines to read in the remainder of the table */
+ for (int j = 0; j < numbreak; ++j) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+ i = 0;
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 5, "BREAK", 5) != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ numbreak = 0;
+ return false;
+ }
+ break;
+ }
+ case 1: {
+ breaktable.push_back(std::string(start_piece, iter));
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+
+ if (breaktable.size() != static_cast<size_t>(numbreak)) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+
+ return true;
+}
+
+void AffixMgr::reverse_condition(std::string& piece) {
+ if (piece.empty())
+ return;
+
+ int neg = 0;
+ for (std::string::reverse_iterator k = piece.rbegin(); k != piece.rend(); ++k) {
+ switch (*k) {
+ case '[': {
+ if (neg)
+ *(k - 1) = '[';
+ else
+ *k = ']';
+ break;
+ }
+ case ']': {
+ *k = '[';
+ if (neg)
+ *(k - 1) = '^';
+ neg = 0;
+ break;
+ }
+ case '^': {
+ if (*(k - 1) == ']')
+ neg = 1;
+ else if (neg)
+ *(k - 1) = *k;
+ break;
+ }
+ default: {
+ if (neg)
+ *(k - 1) = *k;
+ }
+ }
+ }
+}
+
+class entries_container {
+ std::vector<AffEntry*> entries;
+ AffixMgr* m_mgr;
+ char m_at;
+public:
+ entries_container(char at, AffixMgr* mgr)
+ : m_mgr(mgr)
+ , m_at(at) {
+ }
+ void release() {
+ entries.clear();
+ }
+ void initialize(int numents,
+ char opts, unsigned short aflag) {
+ entries.reserve(numents);
+
+ if (m_at == 'P') {
+ entries.push_back(new PfxEntry(m_mgr));
+ } else {
+ entries.push_back(new SfxEntry(m_mgr));
+ }
+
+ entries.back()->opts = opts;
+ entries.back()->aflag = aflag;
+ }
+
+ AffEntry* add_entry(char opts) {
+ if (m_at == 'P') {
+ entries.push_back(new PfxEntry(m_mgr));
+ } else {
+ entries.push_back(new SfxEntry(m_mgr));
+ }
+ AffEntry* ret = entries.back();
+ ret->opts = entries[0]->opts & opts;
+ return ret;
+ }
+
+ AffEntry* first_entry() {
+ return entries.empty() ? NULL : entries[0];
+ }
+
+ ~entries_container() {
+ for (size_t i = 0; i < entries.size(); ++i) {
+ delete entries[i];
+ }
+ }
+
+ std::vector<AffEntry*>::iterator begin() { return entries.begin(); }
+ std::vector<AffEntry*>::iterator end() { return entries.end(); }
+};
+
+bool AffixMgr::parse_affix(const std::string& line,
+ const char at,
+ FileMgr* af,
+ char* dupflags) {
+ int numents = 0; // number of AffEntry structures to parse
+
+ unsigned short aflag = 0; // affix char identifier
+
+ char ff = 0;
+ entries_container affentries(at, this);
+
+ int i = 0;
+
+// checking lines with bad syntax
+#ifdef DEBUG
+ int basefieldnum = 0;
+#endif
+
+ // split affix header line into pieces
+
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ // piece 1 - is type of affix
+ case 0: {
+ np++;
+ break;
+ }
+
+ // piece 2 - is affix char
+ case 1: {
+ np++;
+ aflag = pHMgr->decode_flag(std::string(start_piece, iter).c_str());
+ if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
+ ((at == 'P') && (dupflags[aflag] & dupPFX))) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: multiple definitions of an affix flag\n",
+ af->getlinenum());
+ }
+ dupflags[aflag] += (char)((at == 'S') ? dupSFX : dupPFX);
+ break;
+ }
+ // piece 3 - is cross product indicator
+ case 2: {
+ np++;
+ if (*start_piece == 'Y')
+ ff = aeXPRODUCT;
+ break;
+ }
+
+ // piece 4 - is number of affentries
+ case 3: {
+ np++;
+ numents = atoi(std::string(start_piece, iter).c_str());
+ if ((numents <= 0) || ((std::numeric_limits<size_t>::max() /
+ sizeof(AffEntry)) < static_cast<size_t>(numents))) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ free(err);
+ }
+ return false;
+ }
+
+ char opts = ff;
+ if (utf8)
+ opts |= aeUTF8;
+ if (pHMgr->is_aliasf())
+ opts |= aeALIASF;
+ if (pHMgr->is_aliasm())
+ opts |= aeALIASM;
+ affentries.initialize(numents, opts, aflag);
+ }
+
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ // check to make sure we parsed enough pieces
+ if (np != 4) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ free(err);
+ }
+ return false;
+ }
+
+ // now parse numents affentries for this affix
+ AffEntry* entry = affentries.first_entry();
+ for (int ent = 0; ent < numents; ++ent) {
+ std::string nl;
+ if (!af->getline(nl))
+ return false;
+ mychomp(nl);
+
+ iter = nl.begin();
+ i = 0;
+ np = 0;
+
+ // split line into pieces
+ start_piece = mystrsep(nl, iter);
+ while (start_piece != nl.end()) {
+ switch (i) {
+ // piece 1 - is type
+ case 0: {
+ np++;
+ if (ent != 0)
+ entry = affentries.add_entry((char)(aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM));
+ break;
+ }
+
+ // piece 2 - is affix char
+ case 1: {
+ np++;
+ std::string chunk(start_piece, iter);
+ if (pHMgr->decode_flag(chunk.c_str()) != aflag) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: affix %s is corrupt\n",
+ af->getlinenum(), err);
+ free(err);
+ }
+ return false;
+ }
+
+ if (ent != 0) {
+ AffEntry* start_entry = affentries.first_entry();
+ entry->aflag = start_entry->aflag;
+ }
+ break;
+ }
+
+ // piece 3 - is string to strip or 0 for null
+ case 2: {
+ np++;
+ entry->strip = std::string(start_piece, iter);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->strip);
+ else
+ reverseword(entry->strip);
+ }
+ if (entry->strip.compare("0") == 0) {
+ entry->strip.clear();
+ }
+ break;
+ }
+
+ // piece 4 - is affix string or 0 for null
+ case 3: {
+ entry->morphcode = NULL;
+ entry->contclass = NULL;
+ entry->contclasslen = 0;
+ np++;
+ std::string::const_iterator dash = std::find(start_piece, iter, '/');
+ if (dash != iter) {
+ entry->appnd = std::string(start_piece, dash);
+ std::string dash_str(dash + 1, iter);
+
+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
+ if (utf8) {
+ remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(entry->appnd, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->appnd);
+ else
+ reverseword(entry->appnd);
+ }
+
+ if (pHMgr->is_aliasf()) {
+ int index = atoi(dash_str.c_str());
+ entry->contclasslen = (unsigned short)pHMgr->get_aliasf(
+ index, &(entry->contclass), af);
+ if (!entry->contclasslen)
+ HUNSPELL_WARNING(stderr,
+ "error: bad affix flag alias: \"%s\"\n",
+ dash_str.c_str());
+ } else {
+ entry->contclasslen = (unsigned short)pHMgr->decode_flags(
+ &(entry->contclass), dash_str.c_str(), af);
+ std::sort(entry->contclass, entry->contclass + entry->contclasslen);
+ }
+
+ havecontclass = 1;
+ for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
+ contclasses[(entry->contclass)[_i]] = 1;
+ }
+ } else {
+ entry->appnd = std::string(start_piece, iter);
+
+ if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) {
+ if (utf8) {
+ remove_ignored_chars_utf(entry->appnd, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(entry->appnd, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(entry->appnd);
+ else
+ reverseword(entry->appnd);
+ }
+ }
+
+ if (entry->appnd.compare("0") == 0) {
+ entry->appnd.clear();
+ }
+ break;
+ }
+
+ // piece 5 - is the conditions descriptions
+ case 4: {
+ std::string chunk(start_piece, iter);
+ np++;
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ reverse_condition(chunk);
+ }
+ if (!entry->strip.empty() && chunk != "." &&
+ redundant_condition(at, entry->strip.c_str(), entry->strip.size(), chunk.c_str(),
+ af->getlinenum()))
+ chunk = ".";
+ if (at == 'S') {
+ reverseword(chunk);
+ reverse_condition(chunk);
+ }
+ if (encodeit(*entry, chunk.c_str()))
+ return false;
+ break;
+ }
+
+ case 5: {
+ std::string chunk(start_piece, iter);
+ np++;
+ if (pHMgr->is_aliasm()) {
+ int index = atoi(chunk.c_str());
+ entry->morphcode = pHMgr->get_aliasm(index);
+ } else {
+ if (complexprefixes) { // XXX - fix me for morph. gen.
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ }
+ // add the remaining of the line
+ std::string::const_iterator end = nl.end();
+ if (iter != end) {
+ chunk.append(iter, end);
+ }
+ entry->morphcode = mystrdup(chunk.c_str());
+ if (!entry->morphcode)
+ return false;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ i++;
+ start_piece = mystrsep(nl, iter);
+ }
+ // check to make sure we parsed enough pieces
+ if (np < 4) {
+ char* err = pHMgr->encode_flag(aflag);
+ if (err) {
+ HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
+ af->getlinenum(), err);
+ free(err);
+ }
+ return false;
+ }
+
+#ifdef DEBUG
+ // detect unnecessary fields, excepting comments
+ if (basefieldnum) {
+ int fieldnum =
+ !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
+ if (fieldnum != basefieldnum)
+ HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n",
+ af->getlinenum());
+ } else {
+ basefieldnum =
+ !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6);
+ }
+#endif
+ }
+
+ // now create SfxEntry or PfxEntry objects and use links to
+ // build an ordered (sorted by affix string) list
+ std::vector<AffEntry*>::iterator start = affentries.begin();
+ std::vector<AffEntry*>::iterator end = affentries.end();
+ for (std::vector<AffEntry*>::iterator affentry = start; affentry != end; ++affentry) {
+ if (at == 'P') {
+ build_pfxtree(static_cast<PfxEntry*>(*affentry));
+ } else {
+ build_sfxtree(static_cast<SfxEntry*>(*affentry));
+ }
+ }
+
+ //contents belong to AffixMgr now
+ affentries.release();
+
+ return true;
+}
+
+int AffixMgr::redundant_condition(char ft,
+ const char* strip,
+ int stripl,
+ const char* cond,
+ int linenum) {
+ int condl = strlen(cond);
+ int i;
+ int j;
+ int neg;
+ int in;
+ if (ft == 'P') { // prefix
+ if (strncmp(strip, cond, condl) == 0)
+ return 1;
+ if (utf8) {
+ } else {
+ for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
+ if (cond[j] != '[') {
+ if (cond[j] != strip[i]) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ } else {
+ neg = (cond[j + 1] == '^') ? 1 : 0;
+ in = 0;
+ do {
+ j++;
+ if (strip[i] == cond[j])
+ in = 1;
+ } while ((j < (condl - 1)) && (cond[j] != ']'));
+ if (j == (condl - 1) && (cond[j] != ']')) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: missing ] in condition:\n%s\n",
+ linenum, cond);
+ return 0;
+ }
+ if ((!neg && !in) || (neg && in)) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ }
+ }
+ if (j >= condl)
+ return 1;
+ }
+ } else { // suffix
+ if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0)
+ return 1;
+ if (utf8) {
+ } else {
+ for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if (cond[j] != ']') {
+ if (cond[j] != strip[i]) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ } else {
+ in = 0;
+ do {
+ j--;
+ if (strip[i] == cond[j])
+ in = 1;
+ } while ((j > 0) && (cond[j] != '['));
+ if ((j == 0) && (cond[j] != '[')) {
+ HUNSPELL_WARNING(stderr,
+ "error: line: %d: missing ] in condition:\n%s\n",
+ linenum, cond);
+ return 0;
+ }
+ neg = (cond[j + 1] == '^') ? 1 : 0;
+ if ((!neg && !in) || (neg && in)) {
+ HUNSPELL_WARNING(stderr,
+ "warning: line %d: incompatible stripping "
+ "characters and condition\n",
+ linenum);
+ return 0;
+ }
+ }
+ }
+ if (j < 0)
+ return 1;
+ }
+ }
+ return 0;
+}
+
+std::vector<std::string> AffixMgr::get_suffix_words(short unsigned* suff,
+ int len,
+ const char* root_word) {
+ std::vector<std::string> slst;
+ short unsigned* start_ptr = suff;
+ for (int j = 0; j < SETSIZE; j++) {
+ SfxEntry* ptr = sStart[j];
+ while (ptr) {
+ suff = start_ptr;
+ for (int i = 0; i < len; i++) {
+ if ((*suff) == ptr->getFlag()) {
+ std::string nw(root_word);
+ nw.append(ptr->getAffix());
+ hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, 0, 0, 0);
+ if (ht) {
+ slst.push_back(nw);
+ }
+ }
+ suff++;
+ }
+ ptr = ptr->getNext();
+ }
+ }
+ return slst;
+}
diff --git a/extensions/spellcheck/hunspell/src/affixmgr.hxx b/extensions/spellcheck/hunspell/src/affixmgr.hxx
new file mode 100644
index 0000000000..450f50a65c
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx
@@ -0,0 +1,368 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef AFFIXMGR_HXX_
+#define AFFIXMGR_HXX_
+
+#include <stdio.h>
+
+#include <string>
+#include <vector>
+
+#include "atypes.hxx"
+#include "baseaffix.hxx"
+#include "hashmgr.hxx"
+#include "phonet.hxx"
+#include "replist.hxx"
+
+// check flag duplication
+#define dupSFX (1 << 0)
+#define dupPFX (1 << 1)
+
+class PfxEntry;
+class SfxEntry;
+
+class AffixMgr {
+ PfxEntry* pStart[SETSIZE];
+ SfxEntry* sStart[SETSIZE];
+ PfxEntry* pFlag[SETSIZE];
+ SfxEntry* sFlag[SETSIZE];
+ const std::vector<HashMgr*>& alldic;
+ const HashMgr* pHMgr;
+ std::string keystring;
+ std::string trystring;
+ std::string encoding;
+ struct cs_info* csconv;
+ int utf8;
+ int complexprefixes;
+ FLAG compoundflag;
+ FLAG compoundbegin;
+ FLAG compoundmiddle;
+ FLAG compoundend;
+ FLAG compoundroot;
+ FLAG compoundforbidflag;
+ FLAG compoundpermitflag;
+ int compoundmoresuffixes;
+ int checkcompounddup;
+ int checkcompoundrep;
+ int checkcompoundcase;
+ int checkcompoundtriple;
+ int simplifiedtriple;
+ FLAG forbiddenword;
+ FLAG nosuggest;
+ FLAG nongramsuggest;
+ FLAG needaffix;
+ int cpdmin;
+ RepList* iconvtable;
+ RepList* oconvtable;
+ bool parsedmaptable;
+ std::vector<mapentry> maptable;
+ bool parsedbreaktable;
+ std::vector<std::string> breaktable;
+ bool parsedcheckcpd;
+ std::vector<patentry> checkcpdtable;
+ int simplifiedcpd;
+ bool parseddefcpd;
+ std::vector<flagentry> defcpdtable;
+ phonetable* phone;
+ int maxngramsugs;
+ int maxcpdsugs;
+ int maxdiff;
+ int onlymaxdiff;
+ int nosplitsugs;
+ int sugswithdots;
+ int cpdwordmax;
+ int cpdmaxsyllable;
+ std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
+ std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
+ std::string cpdsyllablenum; // syllable count incrementing flag
+ const char* pfxappnd; // BUG: not stateless
+ const char* sfxappnd; // BUG: not stateless
+ int sfxextra; // BUG: not stateless
+ FLAG sfxflag; // BUG: not stateless
+ char* derived; // BUG: not stateless
+ SfxEntry* sfx; // BUG: not stateless
+ PfxEntry* pfx; // BUG: not stateless
+ int checknum;
+ std::string wordchars; // letters + spec. word characters
+ std::vector<w_char> wordchars_utf16;
+ std::string ignorechars; // letters + spec. word characters
+ std::vector<w_char> ignorechars_utf16;
+ std::string version; // affix and dictionary file version string
+ std::string lang; // language
+ int langnum;
+ FLAG lemma_present;
+ FLAG circumfix;
+ FLAG onlyincompound;
+ FLAG keepcase;
+ FLAG forceucase;
+ FLAG warn;
+ int forbidwarn;
+ FLAG substandard;
+ int checksharps;
+ int fullstrip;
+
+ int havecontclass; // boolean variable
+ char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
+ // affix)
+
+ public:
+ AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
+ ~AffixMgr();
+ struct hentry* affix_check(const char* word,
+ int len,
+ const unsigned short needflag = (unsigned short)0,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* prefix_check(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isSubset(const char* s1, const char* s2);
+ struct hentry* prefix_check_twosfx(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
+ struct hentry* suffix_check(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ struct hentry* suffix_check_twosfx(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string affix_check_morph(const char* word,
+ int len,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+ std::string prefix_check_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ std::string suffix_check_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG cclass = FLAG_NULL,
+ const FLAG needflag = FLAG_NULL,
+ char in_compound = IN_CPD_NOT);
+
+ std::string prefix_check_twosfx_morph(const char* word,
+ int len,
+ char in_compound,
+ const FLAG needflag = FLAG_NULL);
+ std::string suffix_check_twosfx_morph(const char* word,
+ int len,
+ int sfxopts,
+ PfxEntry* ppfx,
+ const FLAG needflag = FLAG_NULL);
+
+ std::string morphgen(const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* morph,
+ const char* targetmorph,
+ int level);
+
+ int expand_rootword(struct guessword* wlst,
+ int maxn,
+ const char* ts,
+ int wl,
+ const unsigned short* ap,
+ unsigned short al,
+ const char* bad,
+ int,
+ const char*);
+
+ short get_syllable(const std::string& word);
+ int cpdrep_check(const char* word, int len);
+ int cpdwordpair_check(const char * word, int len);
+ int cpdpat_check(const char* word,
+ int len,
+ hentry* r1,
+ hentry* r2,
+ const char affixed);
+ int defcpd_check(hentry*** words,
+ short wnum,
+ hentry* rv,
+ hentry** rwords,
+ char all);
+ int cpdcase_check(const char* word, int len);
+ inline int candidate_check(const char* word, int len);
+ void setcminmax(int* cmin, int* cmax, const char* word, int len);
+ struct hentry* compound_check(const std::string& word,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ char is_sug,
+ int* info);
+
+ int compound_check_morph(const char* word,
+ int len,
+ short wordnum,
+ short numsyllable,
+ short maxwordnum,
+ short wnum,
+ hentry** words,
+ hentry** rwords,
+ char hu_mov_rule,
+ std::string& result,
+ const std::string* partresult);
+
+ std::vector<std::string> get_suffix_words(short unsigned* suff,
+ int len,
+ const char* root_word);
+
+ struct hentry* lookup(const char* word);
+ const std::vector<replentry>& get_reptable() const;
+ RepList* get_iconvtable() const;
+ RepList* get_oconvtable() const;
+ struct phonetable* get_phonetable() const;
+ const std::vector<mapentry>& get_maptable() const;
+ const std::vector<std::string>& get_breaktable() const;
+ const std::string& get_encoding();
+ int get_langnum() const;
+ char* get_key_string();
+ char* get_try_string() const;
+ const std::string& get_wordchars() const;
+ const std::vector<w_char>& get_wordchars_utf16() const;
+ const char* get_ignore() const;
+ const std::vector<w_char>& get_ignore_utf16() const;
+ int get_compound() const;
+ FLAG get_compoundflag() const;
+ FLAG get_forbiddenword() const;
+ FLAG get_nosuggest() const;
+ FLAG get_nongramsuggest() const;
+ FLAG get_substandard() const;
+ FLAG get_needaffix() const;
+ FLAG get_onlyincompound() const;
+ const char* get_derived() const;
+ const std::string& get_version() const;
+ int have_contclass() const;
+ int get_utf8() const;
+ int get_complexprefixes() const;
+ char* get_suffixed(char) const;
+ int get_maxngramsugs() const;
+ int get_maxcpdsugs() const;
+ int get_maxdiff() const;
+ int get_onlymaxdiff() const;
+ int get_nosplitsugs() const;
+ int get_sugswithdots(void) const;
+ FLAG get_keepcase(void) const;
+ FLAG get_forceucase(void) const;
+ FLAG get_warn(void) const;
+ int get_forbidwarn(void) const;
+ int get_checksharps(void) const;
+ char* encode_flag(unsigned short aflag) const;
+ int get_fullstrip() const;
+
+ private:
+ int parse_file(const char* affpath, const char* key);
+ bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
+ bool parse_num(const std::string& line, int* out, FileMgr* af);
+ bool parse_cpdsyllable(const std::string& line, FileMgr* af);
+ bool parse_convtable(const std::string& line,
+ FileMgr* af,
+ RepList** rl,
+ const std::string& keyword);
+ bool parse_phonetable(const std::string& line, FileMgr* af);
+ bool parse_maptable(const std::string& line, FileMgr* af);
+ bool parse_breaktable(const std::string& line, FileMgr* af);
+ bool parse_checkcpdtable(const std::string& line, FileMgr* af);
+ bool parse_defcpdtable(const std::string& line, FileMgr* af);
+ bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
+
+ void reverse_condition(std::string&);
+ std::string& debugflag(std::string& result, unsigned short flag);
+ int condlen(const char*);
+ int encodeit(AffEntry& entry, const char* cs);
+ int build_pfxtree(PfxEntry* pfxptr);
+ int build_sfxtree(SfxEntry* sfxptr);
+ int process_pfx_order();
+ int process_sfx_order();
+ PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
+ SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
+ int process_pfx_tree_to_list();
+ int process_sfx_tree_to_list();
+ int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
+ void finishFileMgr(FileMgr* afflst);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/atypes.hxx b/extensions/spellcheck/hunspell/src/atypes.hxx
new file mode 100644
index 0000000000..1b78d4724b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/atypes.hxx
@@ -0,0 +1,129 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef ATYPES_HXX_
+#define ATYPES_HXX_
+
+#ifndef HUNSPELL_WARNING
+#include <stdio.h>
+#ifdef HUNSPELL_WARNING_ON
+#define HUNSPELL_WARNING fprintf
+#else
+// empty inline function to switch off warnings (instead of the C99 standard
+// variadic macros)
+static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
+#endif
+#endif
+
+// HUNSTEM def.
+#define HUNSTEM
+
+#include "w_char.hxx"
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#define SETSIZE 256
+#define CONTSIZE 65536
+
+// AffEntry options
+#define aeXPRODUCT (1 << 0)
+#define aeUTF8 (1 << 1)
+#define aeALIASF (1 << 2)
+#define aeALIASM (1 << 3)
+#define aeLONGCOND (1 << 4)
+
+// compound options
+#define IN_CPD_NOT 0
+#define IN_CPD_BEGIN 1
+#define IN_CPD_END 2
+#define IN_CPD_OTHER 3
+
+// info options
+#define SPELL_COMPOUND (1 << 0)
+#define SPELL_FORBIDDEN (1 << 1)
+#define SPELL_ALLCAP (1 << 2)
+#define SPELL_NOCAP (1 << 3)
+#define SPELL_INITCAP (1 << 4)
+#define SPELL_ORIGCAP (1 << 5)
+#define SPELL_WARN (1 << 6)
+
+#define MINCPDLEN 3
+#define MAXCOMPOUND 10
+#define MAXCONDLEN 20
+#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*))
+
+#define MAXACC 1000
+
+#define FLAG unsigned short
+#define FLAG_NULL 0x00
+#define FREE_FLAG(a) a = 0
+
+#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
+
+// timelimit: max. ~1/4 sec (process time on Linux) for
+// for a suggestion, including max. ~/10 sec for a case
+// sensitive plain or compound word suggestion, within
+// ~1/20 sec long time consuming suggestion functions
+#define TIMELIMIT_GLOBAL (CLOCKS_PER_SEC / 4)
+#define TIMELIMIT_SUGGESTION (CLOCKS_PER_SEC / 10)
+#define TIMELIMIT (CLOCKS_PER_SEC / 20)
+#define MINTIMER 100
+#define MAXPLUSTIMER 100
+
+struct guessword {
+ char* word;
+ bool allow;
+ char* orig;
+};
+
+typedef std::vector<std::string> mapentry;
+typedef std::vector<FLAG> flagentry;
+
+struct patentry {
+ std::string pattern;
+ std::string pattern2;
+ std::string pattern3;
+ FLAG cond;
+ FLAG cond2;
+ patentry()
+ : cond(FLAG_NULL)
+ , cond2(FLAG_NULL) {
+ }
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/baseaffix.hxx b/extensions/spellcheck/hunspell/src/baseaffix.hxx
new file mode 100644
index 0000000000..52cd60e028
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/baseaffix.hxx
@@ -0,0 +1,74 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef BASEAFF_HXX_
+#define BASEAFF_HXX_
+
+#include <string>
+
+class AffEntry {
+ private:
+ AffEntry(const AffEntry&);
+ AffEntry& operator=(const AffEntry&);
+
+ public:
+ AffEntry()
+ : numconds(0),
+ opts(0),
+ aflag(0),
+ morphcode(0),
+ contclass(NULL),
+ contclasslen(0) {}
+ virtual ~AffEntry();
+ std::string appnd;
+ std::string strip;
+ unsigned char numconds;
+ char opts;
+ unsigned short aflag;
+ union {
+ char conds[MAXCONDLEN];
+ struct {
+ char conds1[MAXCONDLEN_1];
+ char* conds2;
+ } l;
+ } c;
+ char* morphcode;
+ unsigned short* contclass;
+ short contclasslen;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
new file mode 100644
index 0000000000..48e58ff4b2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
@@ -0,0 +1,2549 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <algorithm>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <sstream>
+
+#include "csutil.hxx"
+#include "atypes.hxx"
+#include "langnum.hxx"
+
+#ifdef _WIN32
+#include <windows.h>
+#include <wchar.h>
+#endif
+
+#ifdef OPENOFFICEORG
+#include <unicode/uchar.h>
+#else
+#ifndef MOZILLA_CLIENT
+#include "utf_info.hxx"
+#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
+#endif
+#endif
+
+#ifdef MOZILLA_CLIENT
+#include "mozHunspellRLBoxGlue.h"
+#endif
+
+struct unicode_info2 {
+ char cletter;
+ unsigned short cupper;
+ unsigned short clower;
+};
+
+static struct unicode_info2* utf_tbl = NULL;
+static int utf_tbl_count =
+ 0; // utf_tbl can be used by multiple Hunspell instances
+
+void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
+ if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
+ int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
+ wchar_t* buff = new wchar_t[len];
+ wchar_t* buff2 = new wchar_t[len];
+ MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
+ if (_wfullpath(buff2, buff, len) != NULL) {
+ stream.open(buff2, mode);
+ }
+ delete [] buff;
+ delete [] buff2;
+ }
+ else
+#endif
+ stream.open(path, mode);
+}
+
+std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
+ dest.clear();
+ std::vector<w_char>::const_iterator u2 = src.begin();
+ std::vector<w_char>::const_iterator u2_max = src.end();
+ while (u2 < u2_max) {
+ signed char u8;
+ if (u2->h) { // > 0xFF
+ // XXX 4-byte haven't implemented yet.
+ if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character)
+ u8 = 0xe0 + (u2->h >> 4);
+ dest.push_back(u8);
+ u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ } else { // < 0x800 (2-byte UTF-8 character)
+ u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ }
+ } else { // <= 0xFF
+ if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character)
+ u8 = 0xc0 + (u2->l >> 6);
+ dest.push_back(u8);
+ u8 = 0x80 + (u2->l & 0x3f);
+ dest.push_back(u8);
+ } else { // < 0x80 (1-byte UTF-8 character)
+ u8 = u2->l;
+ dest.push_back(u8);
+ }
+ }
+ ++u2;
+ }
+ return dest;
+}
+
+int u8_u16(std::vector<w_char>& dest, const std::string& src) {
+ dest.clear();
+ std::string::const_iterator u8 = src.begin();
+ std::string::const_iterator u8_max = src.end();
+
+ while (u8 < u8_max) {
+ w_char u2;
+ switch ((*u8) & 0xf0) {
+ case 0x00:
+ case 0x10:
+ case 0x20:
+ case 0x30:
+ case 0x40:
+ case 0x50:
+ case 0x60:
+ case 0x70: {
+ u2.h = 0;
+ u2.l = *u8;
+ break;
+ }
+ case 0x80:
+ case 0x90:
+ case 0xa0:
+ case 0xb0: {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Unexpected continuation bytes "
+ "in %ld. character position\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ break;
+ }
+ case 0xc0:
+ case 0xd0: { // 2-byte UTF-8 codes
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.h = (*u8 & 0x1f) >> 2;
+ u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);
+ ++u8;
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte in "
+ "%ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ break;
+ }
+ case 0xe0: { // 3-byte UTF-8 codes
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2);
+ ++u8;
+ if ((*(u8 + 1) & 0xc0) == 0x80) {
+ u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);
+ ++u8;
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte "
+ "in %ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ } else {
+ HUNSPELL_WARNING(stderr,
+ "UTF-8 encoding error. Missing continuation byte in "
+ "%ld. character position:\n%s\n",
+ static_cast<long>(std::distance(src.begin(), u8)),
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ }
+ break;
+ }
+ case 0xf0: { // 4 or more byte UTF-8 codes
+ HUNSPELL_WARNING(stderr,
+ "This UTF-8 encoding can't convert to UTF-16:\n%s\n",
+ src.c_str());
+ u2.h = 0xff;
+ u2.l = 0xfd;
+ dest.push_back(u2);
+ return -1;
+ }
+ }
+ dest.push_back(u2);
+ ++u8;
+ }
+
+ return dest.size();
+}
+
+namespace {
+class is_any_of {
+ public:
+ explicit is_any_of(const std::string& in) : chars(in) {}
+
+ bool operator()(char c) { return chars.find(c) != std::string::npos; }
+
+ private:
+ std::string chars;
+};
+}
+
+std::string::const_iterator mystrsep(const std::string &str,
+ std::string::const_iterator& start) {
+ std::string::const_iterator end = str.end();
+
+ is_any_of op(" \t");
+ // don't use isspace() here, the string can be in some random charset
+ // that's way different than the locale's
+ std::string::const_iterator sp = start;
+ while (sp != end && op(*sp))
+ ++sp;
+
+ std::string::const_iterator dp = sp;
+ while (dp != end && !op(*dp))
+ ++dp;
+
+ start = dp;
+ return sp;
+}
+
+// replaces strdup with ansi version
+char* mystrdup(const char* s) {
+ char* d = NULL;
+ if (s) {
+ size_t sl = strlen(s) + 1;
+ d = (char*)malloc(sl);
+ if (d) {
+ memcpy(d, s, sl);
+ } else {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ }
+ }
+ return d;
+}
+
+// remove cross-platform text line end characters
+void mychomp(std::string& s) {
+ size_t k = s.size();
+ size_t newsize = k;
+ if ((k > 0) && ((s[k - 1] == '\r') || (s[k - 1] == '\n')))
+ --newsize;
+ if ((k > 1) && (s[k - 2] == '\r'))
+ --newsize;
+ s.resize(newsize);
+}
+
+// break text to lines
+std::vector<std::string> line_tok(const std::string& text, char breakchar) {
+ std::vector<std::string> ret;
+ if (text.empty()) {
+ return ret;
+ }
+
+ std::stringstream ss(text);
+ std::string tok;
+ while(std::getline(ss, tok, breakchar)) {
+ if (!tok.empty()) {
+ ret.push_back(tok);
+ }
+ }
+
+ return ret;
+}
+
+// uniq line in place
+void line_uniq(std::string& text, char breakchar)
+{
+ std::vector<std::string> lines = line_tok(text, breakchar);
+ text.clear();
+ if (lines.empty()) {
+ return;
+ }
+ text = lines[0];
+ for (size_t i = 1; i < lines.size(); ++i) {
+ bool dup = false;
+ for (size_t j = 0; j < i; ++j) {
+ if (lines[i] == lines[j]) {
+ dup = true;
+ break;
+ }
+ }
+ if (!dup) {
+ if (!text.empty())
+ text.push_back(breakchar);
+ text.append(lines[i]);
+ }
+ }
+}
+
+// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
+void line_uniq_app(std::string& text, char breakchar) {
+ if (text.find(breakchar) == std::string::npos) {
+ return;
+ }
+
+ std::vector<std::string> lines = line_tok(text, breakchar);
+ text.clear();
+ if (lines.empty()) {
+ return;
+ }
+ text = lines[0];
+ for (size_t i = 1; i < lines.size(); ++i) {
+ bool dup = false;
+ for (size_t j = 0; j < i; ++j) {
+ if (lines[i] == lines[j]) {
+ dup = true;
+ break;
+ }
+ }
+ if (!dup) {
+ if (!text.empty())
+ text.push_back(breakchar);
+ text.append(lines[i]);
+ }
+ }
+
+ if (lines.size() == 1) {
+ text = lines[0];
+ return;
+ }
+
+ text.assign(" ( ");
+ for (size_t i = 0; i < lines.size(); ++i) {
+ text.append(lines[i]);
+ text.append(" | ");
+ }
+ text[text.size() - 2] = ')'; // " ) "
+}
+
+// append s to ends of every lines in text
+std::string& strlinecat(std::string& str, const std::string& apd) {
+ size_t pos = 0;
+ while ((pos = str.find('\n', pos)) != std::string::npos) {
+ str.insert(pos, apd);
+ pos += apd.length() + 1;
+ }
+ str.append(apd);
+ return str;
+}
+
+int fieldlen(const char* r) {
+ int n = 0;
+ while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
+ r++;
+ n++;
+ }
+ return n;
+}
+
+bool copy_field(std::string& dest,
+ const std::string& morph,
+ const std::string& var) {
+ if (morph.empty())
+ return false;
+ size_t pos = morph.find(var);
+ if (pos == std::string::npos)
+ return false;
+ dest.clear();
+ std::string beg(morph.substr(pos + MORPH_TAG_LEN, std::string::npos));
+
+ for (size_t i = 0; i < beg.size(); ++i) {
+ const char c(beg[i]);
+ if (c == ' ' || c == '\t' || c == '\n')
+ break;
+ dest.push_back(c);
+ }
+
+ return true;
+}
+
+std::string& mystrrep(std::string& str,
+ const std::string& search,
+ const std::string& replace) {
+ size_t pos = 0;
+ while ((pos = str.find(search, pos)) != std::string::npos) {
+ str.replace(pos, search.length(), replace);
+ pos += replace.length();
+ }
+ return str;
+}
+
+// reverse word
+size_t reverseword(std::string& word) {
+ std::reverse(word.begin(), word.end());
+ return word.size();
+}
+
+// reverse word
+size_t reverseword_utf(std::string& word) {
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ std::reverse(w.begin(), w.end());
+ u16_u8(word, w);
+ return w.size();
+}
+
+void uniqlist(std::vector<std::string>& list) {
+ if (list.size() < 2)
+ return;
+
+ std::vector<std::string> ret;
+ ret.push_back(list[0]);
+
+ for (size_t i = 1; i < list.size(); ++i) {
+ if (std::find(ret.begin(), ret.end(), list[i]) == ret.end())
+ ret.push_back(list[i]);
+ }
+
+ list.swap(ret);
+}
+
+namespace {
+unsigned char cupper(const struct cs_info* csconv, int nIndex) {
+ assert(nIndex >= 0 && nIndex <= 255);
+ return csconv[nIndex].cupper;
+}
+
+unsigned char clower(const struct cs_info* csconv, int nIndex) {
+ assert(nIndex >= 0 && nIndex <= 255);
+ return csconv[nIndex].clower;
+}
+
+unsigned char ccase(const struct cs_info* csconv, int nIndex) {
+ assert(nIndex >= 0 && nIndex <= 255);
+ return csconv[nIndex].ccase;
+}
+}
+
+w_char upper_utf(w_char u, int langnum) {
+ unsigned short idx = (u.h << 8) + u.l;
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u.h = (unsigned char)(upridx >> 8);
+ u.l = (unsigned char)(upridx & 0x00FF);
+ }
+ return u;
+}
+
+w_char lower_utf(w_char u, int langnum) {
+ unsigned short idx = (u.h << 8) + u.l;
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u.h = (unsigned char)(lwridx >> 8);
+ u.l = (unsigned char)(lwridx & 0x00FF);
+ }
+ return u;
+}
+
+// convert std::string to all caps
+std::string& mkallcap(std::string& s, const struct cs_info* csconv) {
+ for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) {
+ *aI = cupper(csconv, static_cast<unsigned char>(*aI));
+ }
+ return s;
+}
+
+// convert std::string to all little
+std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
+ for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) {
+ *aI = clower(csconv, static_cast<unsigned char>(*aI));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
+ int langnum) {
+ for (size_t i = 0; i < u.size(); ++i) {
+ unsigned short idx = (u[i].h << 8) + u[i].l;
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[i].h = (unsigned char)(lwridx >> 8);
+ u[i].l = (unsigned char)(lwridx & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
+ for (size_t i = 0; i < u.size(); i++) {
+ unsigned short idx = (u[i].h << 8) + u[i].l;
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[i].h = (unsigned char)(upridx >> 8);
+ u[i].l = (unsigned char)(upridx & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
+ if (!s.empty()) {
+ s[0] = cupper(csconv, static_cast<unsigned char>(s[0]));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
+ if (!u.empty()) {
+ unsigned short idx = (u[0].h << 8) + u[0].l;
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[0].h = (unsigned char)(upridx >> 8);
+ u[0].l = (unsigned char)(upridx & 0x00FF);
+ }
+ }
+ return u;
+}
+
+std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
+ if (!s.empty()) {
+ s[0] = clower(csconv, static_cast<unsigned char>(s[0]));
+ }
+ return s;
+}
+
+std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
+ if (!u.empty()) {
+ unsigned short idx = (u[0].h << 8) + u[0].l;
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[0].h = (unsigned char)(lwridx >> 8);
+ u[0].l = (unsigned char)(lwridx & 0x00FF);
+ }
+ }
+ return u;
+}
+
+// conversion function for protected memory
+void store_pointer(char* dest, char* source) {
+ memcpy(dest, &source, sizeof(char*));
+}
+
+// conversion function for protected memory
+char* get_stored_pointer(const char* s) {
+ char* p;
+ memcpy(&p, s, sizeof(char*));
+ return p;
+}
+
+#ifndef MOZILLA_CLIENT
+
+// these are simple character mappings for the
+// encodings supported
+// supplying isupper, tolower, and toupper
+
+static struct cs_info iso1_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso2_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xbe, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso3_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0x69, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0x49},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso4_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xbe, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso5_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xf1, 0xa1},
+ {0x01, 0xf2, 0xa2}, {0x01, 0xf3, 0xa3}, {0x01, 0xf4, 0xa4},
+ {0x01, 0xf5, 0xa5}, {0x01, 0xf6, 0xa6}, {0x01, 0xf7, 0xa7},
+ {0x01, 0xf8, 0xa8}, {0x01, 0xf9, 0xa9}, {0x01, 0xfa, 0xaa},
+ {0x01, 0xfb, 0xab}, {0x01, 0xfc, 0xac}, {0x00, 0xad, 0xad},
+ {0x01, 0xfe, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xd0, 0xb0},
+ {0x01, 0xd1, 0xb1}, {0x01, 0xd2, 0xb2}, {0x01, 0xd3, 0xb3},
+ {0x01, 0xd4, 0xb4}, {0x01, 0xd5, 0xb5}, {0x01, 0xd6, 0xb6},
+ {0x01, 0xd7, 0xb7}, {0x01, 0xd8, 0xb8}, {0x01, 0xd9, 0xb9},
+ {0x01, 0xda, 0xba}, {0x01, 0xdb, 0xbb}, {0x01, 0xdc, 0xbc},
+ {0x01, 0xdd, 0xbd}, {0x01, 0xde, 0xbe}, {0x01, 0xdf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xb0}, {0x00, 0xd1, 0xb1},
+ {0x00, 0xd2, 0xb2}, {0x00, 0xd3, 0xb3}, {0x00, 0xd4, 0xb4},
+ {0x00, 0xd5, 0xb5}, {0x00, 0xd6, 0xb6}, {0x00, 0xd7, 0xb7},
+ {0x00, 0xd8, 0xb8}, {0x00, 0xd9, 0xb9}, {0x00, 0xda, 0xba},
+ {0x00, 0xdb, 0xbb}, {0x00, 0xdc, 0xbc}, {0x00, 0xdd, 0xbd},
+ {0x00, 0xde, 0xbe}, {0x00, 0xdf, 0xbf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xa1}, {0x00, 0xf2, 0xa2},
+ {0x00, 0xf3, 0xa3}, {0x00, 0xf4, 0xa4}, {0x00, 0xf5, 0xa5},
+ {0x00, 0xf6, 0xa6}, {0x00, 0xf7, 0xa7}, {0x00, 0xf8, 0xa8},
+ {0x00, 0xf9, 0xa9}, {0x00, 0xfa, 0xaa}, {0x00, 0xfb, 0xab},
+ {0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae},
+ {0x00, 0xff, 0xaf}};
+
+static struct cs_info iso6_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso7_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x01, 0xdc, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x01, 0xdd, 0xb8}, {0x01, 0xde, 0xb9},
+ {0x01, 0xdf, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xfc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x01, 0xfd, 0xbe}, {0x01, 0xfe, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x00, 0xdc, 0xb6}, {0x00, 0xdd, 0xb8},
+ {0x00, 0xde, 0xb9}, {0x00, 0xdf, 0xba}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd3},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso8_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso9_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0xfd, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0xdd}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0x69, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso10_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info koi8r_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xe0}, {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2},
+ {0x00, 0xc3, 0xe3}, {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5},
+ {0x00, 0xc6, 0xe6}, {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8},
+ {0x00, 0xc9, 0xe9}, {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb},
+ {0x00, 0xcc, 0xec}, {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee},
+ {0x00, 0xcf, 0xef}, {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1},
+ {0x00, 0xd2, 0xf2}, {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4},
+ {0x00, 0xd5, 0xf5}, {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7},
+ {0x00, 0xd8, 0xf8}, {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa},
+ {0x00, 0xdb, 0xfb}, {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd},
+ {0x00, 0xde, 0xfe}, {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0},
+ {0x01, 0xc1, 0xe1}, {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3},
+ {0x01, 0xc4, 0xe4}, {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6},
+ {0x01, 0xc7, 0xe7}, {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9},
+ {0x01, 0xca, 0xea}, {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec},
+ {0x01, 0xcd, 0xed}, {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef},
+ {0x01, 0xd0, 0xf0}, {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2},
+ {0x01, 0xd3, 0xf3}, {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5},
+ {0x01, 0xd6, 0xf6}, {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8},
+ {0x01, 0xd9, 0xf9}, {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb},
+ {0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe},
+ {0x01, 0xdf, 0xff}};
+
+static struct cs_info koi8u_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xb4}, /* ie */
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xb6}, /* i */
+ {0x00, 0xa7, 0xb7}, /* ii */
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xbd}, /* g'' */
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3},
+ {0x00, 0xb4, 0xb4}, /* IE */
+ {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, /* I */
+ {0x00, 0xb7, 0xb7}, /* II */
+ {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, {0x00, 0xba, 0xba},
+ {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, {0x00, 0xbd, 0xbd},
+ {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, {0x00, 0xc0, 0xe0},
+ {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, {0x00, 0xc3, 0xe3},
+ {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, {0x00, 0xc6, 0xe6},
+ {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, {0x00, 0xc9, 0xe9},
+ {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, {0x00, 0xcc, 0xec},
+ {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, {0x00, 0xcf, 0xef},
+ {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, {0x00, 0xd2, 0xf2},
+ {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, {0x00, 0xd5, 0xf5},
+ {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, {0x00, 0xd8, 0xf8},
+ {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, {0x00, 0xdb, 0xfb},
+ {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, {0x00, 0xde, 0xfe},
+ {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, {0x01, 0xc1, 0xe1},
+ {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, {0x01, 0xc4, 0xe4},
+ {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, {0x01, 0xc7, 0xe7},
+ {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, {0x01, 0xca, 0xea},
+ {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, {0x01, 0xcd, 0xed},
+ {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, {0x01, 0xd0, 0xf0},
+ {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, {0x01, 0xd3, 0xf3},
+ {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, {0x01, 0xd6, 0xf6},
+ {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, {0x01, 0xd9, 0xf9},
+ {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc},
+ {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}};
+
+static struct cs_info cp1251_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x01, 0x90, 0x80},
+ {0x01, 0x83, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x81},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x01, 0x9a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x01, 0x9c, 0x8c},
+ {0x01, 0x9d, 0x8d}, {0x01, 0x9e, 0x8e}, {0x01, 0x9f, 0x8f},
+ {0x00, 0x90, 0x80}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x8a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x8c}, {0x00, 0x9d, 0x8d}, {0x00, 0x9e, 0x8e},
+ {0x00, 0x9f, 0x8f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1},
+ {0x00, 0xa2, 0xa1}, {0x01, 0xbc, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x01, 0xb4, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2},
+ {0x00, 0xb4, 0xa5}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xa3},
+ {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xaf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x01, 0xff, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xdf}};
+
+static struct cs_info iso13_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0A, 0x0A}, {0x00, 0x0B, 0x0B},
+ {0x00, 0x0C, 0x0C}, {0x00, 0x0D, 0x0D}, {0x00, 0x0E, 0x0E},
+ {0x00, 0x0F, 0x0F}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1A, 0x1A},
+ {0x00, 0x1B, 0x1B}, {0x00, 0x1C, 0x1C}, {0x00, 0x1D, 0x1D},
+ {0x00, 0x1E, 0x1E}, {0x00, 0x1F, 0x1F}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2A, 0x2A}, {0x00, 0x2B, 0x2B}, {0x00, 0x2C, 0x2C},
+ {0x00, 0x2D, 0x2D}, {0x00, 0x2E, 0x2E}, {0x00, 0x2F, 0x2F},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3A, 0x3A}, {0x00, 0x3B, 0x3B},
+ {0x00, 0x3C, 0x3C}, {0x00, 0x3D, 0x3D}, {0x00, 0x3E, 0x3E},
+ {0x00, 0x3F, 0x3F}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6A, 0x4A},
+ {0x01, 0x6B, 0x4B}, {0x01, 0x6C, 0x4C}, {0x01, 0x6D, 0x4D},
+ {0x01, 0x6E, 0x4E}, {0x01, 0x6F, 0x4F}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7A, 0x5A}, {0x00, 0x5B, 0x5B}, {0x00, 0x5C, 0x5C},
+ {0x00, 0x5D, 0x5D}, {0x00, 0x5E, 0x5E}, {0x00, 0x5F, 0x5F},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6A, 0x4A}, {0x00, 0x6B, 0x4B},
+ {0x00, 0x6C, 0x4C}, {0x00, 0x6D, 0x4D}, {0x00, 0x6E, 0x4E},
+ {0x00, 0x6F, 0x4F}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7A, 0x5A},
+ {0x00, 0x7B, 0x7B}, {0x00, 0x7C, 0x7C}, {0x00, 0x7D, 0x7D},
+ {0x00, 0x7E, 0x7E}, {0x00, 0x7F, 0x7F}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8A, 0x8A}, {0x00, 0x8B, 0x8B}, {0x00, 0x8C, 0x8C},
+ {0x00, 0x8D, 0x8D}, {0x00, 0x8E, 0x8E}, {0x00, 0x8F, 0x8F},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9A, 0x9A}, {0x00, 0x9B, 0x9B},
+ {0x00, 0x9C, 0x9C}, {0x00, 0x9D, 0x9D}, {0x00, 0x9E, 0x9E},
+ {0x00, 0x9F, 0x9F}, {0x00, 0xA0, 0xA0}, {0x00, 0xA1, 0xA1},
+ {0x00, 0xA2, 0xA2}, {0x00, 0xA3, 0xA3}, {0x00, 0xA4, 0xA4},
+ {0x00, 0xA5, 0xA5}, {0x00, 0xA6, 0xA6}, {0x00, 0xA7, 0xA7},
+ {0x01, 0xB8, 0xA8}, {0x00, 0xA9, 0xA9}, {0x01, 0xBA, 0xAA},
+ {0x00, 0xAB, 0xAB}, {0x00, 0xAC, 0xAC}, {0x00, 0xAD, 0xAD},
+ {0x00, 0xAE, 0xAE}, {0x01, 0xBF, 0xAF}, {0x00, 0xB0, 0xB0},
+ {0x00, 0xB1, 0xB1}, {0x00, 0xB2, 0xB2}, {0x00, 0xB3, 0xB3},
+ {0x00, 0xB4, 0xB4}, {0x00, 0xB5, 0xB5}, {0x00, 0xB6, 0xB6},
+ {0x00, 0xB7, 0xB7}, {0x00, 0xB8, 0xA8}, {0x00, 0xB9, 0xB9},
+ {0x00, 0xBA, 0xAA}, {0x00, 0xBB, 0xBB}, {0x00, 0xBC, 0xBC},
+ {0x00, 0xBD, 0xBD}, {0x00, 0xBE, 0xBE}, {0x00, 0xBF, 0xAF},
+ {0x01, 0xE0, 0xC0}, {0x01, 0xE1, 0xC1}, {0x01, 0xE2, 0xC2},
+ {0x01, 0xE3, 0xC3}, {0x01, 0xE4, 0xC4}, {0x01, 0xE5, 0xC5},
+ {0x01, 0xE6, 0xC6}, {0x01, 0xE7, 0xC7}, {0x01, 0xE8, 0xC8},
+ {0x01, 0xE9, 0xC9}, {0x01, 0xEA, 0xCA}, {0x01, 0xEB, 0xCB},
+ {0x01, 0xEC, 0xCC}, {0x01, 0xED, 0xCD}, {0x01, 0xEE, 0xCE},
+ {0x01, 0xEF, 0xCF}, {0x01, 0xF0, 0xD0}, {0x01, 0xF1, 0xD1},
+ {0x01, 0xF2, 0xD2}, {0x01, 0xF3, 0xD3}, {0x01, 0xF4, 0xD4},
+ {0x01, 0xF5, 0xD5}, {0x01, 0xF6, 0xD6}, {0x00, 0xD7, 0xD7},
+ {0x01, 0xF8, 0xD8}, {0x01, 0xF9, 0xD9}, {0x01, 0xFA, 0xDA},
+ {0x01, 0xFB, 0xDB}, {0x01, 0xFC, 0xDC}, {0x01, 0xFD, 0xDD},
+ {0x01, 0xFE, 0xDE}, {0x00, 0xDF, 0xDF}, {0x00, 0xE0, 0xC0},
+ {0x00, 0xE1, 0xC1}, {0x00, 0xE2, 0xC2}, {0x00, 0xE3, 0xC3},
+ {0x00, 0xE4, 0xC4}, {0x00, 0xE5, 0xC5}, {0x00, 0xE6, 0xC6},
+ {0x00, 0xE7, 0xC7}, {0x00, 0xE8, 0xC8}, {0x00, 0xE9, 0xC9},
+ {0x00, 0xEA, 0xCA}, {0x00, 0xEB, 0xCB}, {0x00, 0xEC, 0xCC},
+ {0x00, 0xED, 0xCD}, {0x00, 0xEE, 0xCE}, {0x00, 0xEF, 0xCF},
+ {0x00, 0xF0, 0xD0}, {0x00, 0xF1, 0xD1}, {0x00, 0xF2, 0xD2},
+ {0x00, 0xF3, 0xD3}, {0x00, 0xF4, 0xD4}, {0x00, 0xF5, 0xD5},
+ {0x00, 0xF6, 0xD6}, {0x00, 0xF7, 0xF7}, {0x00, 0xF8, 0xD8},
+ {0x00, 0xF9, 0xD9}, {0x00, 0xFA, 0xDA}, {0x00, 0xFB, 0xDB},
+ {0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE},
+ {0x00, 0xFF, 0xFF}};
+
+static struct cs_info iso14_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1},
+ {0x00, 0xa2, 0xa1}, {0x00, 0xa3, 0xa3}, {0x01, 0xa5, 0xa4},
+ {0x00, 0xa5, 0xa4}, {0x01, 0xa6, 0xab}, {0x00, 0xa7, 0xa7},
+ {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa},
+ {0x00, 0xab, 0xa6}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xb1, 0xb0},
+ {0x00, 0xb1, 0xb0}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2},
+ {0x01, 0xb5, 0xb4}, {0x00, 0xb5, 0xb4}, {0x00, 0xb6, 0xb6},
+ {0x01, 0xb9, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb6},
+ {0x00, 0xba, 0xaa}, {0x01, 0xbf, 0xbb}, {0x00, 0xbc, 0xac},
+ {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xbb},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info iso15_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x01, 0xa8, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa6}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x01, 0xb8, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb4}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xbd, 0xbc},
+ {0x00, 0xbd, 0xbc}, {0x01, 0xff, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2},
+ {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5},
+ {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8},
+ {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb},
+ {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce},
+ {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1},
+ {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4},
+ {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda},
+ {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd},
+ {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0},
+ {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3},
+ {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6},
+ {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9},
+ {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc},
+ {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf},
+ {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2},
+ {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5},
+ {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8},
+ {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb},
+ {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
+ {0x00, 0xff, 0xbe}};
+
+static struct cs_info iscii_devanagari_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+static struct cs_info tis620_tbl[] = {
+ {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
+ {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
+ {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
+ {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b},
+ {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e},
+ {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11},
+ {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14},
+ {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17},
+ {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a},
+ {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d},
+ {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20},
+ {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23},
+ {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26},
+ {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29},
+ {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c},
+ {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f},
+ {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32},
+ {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35},
+ {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38},
+ {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b},
+ {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e},
+ {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41},
+ {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44},
+ {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47},
+ {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a},
+ {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d},
+ {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50},
+ {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53},
+ {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56},
+ {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59},
+ {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c},
+ {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f},
+ {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42},
+ {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45},
+ {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48},
+ {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b},
+ {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e},
+ {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51},
+ {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54},
+ {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57},
+ {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a},
+ {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d},
+ {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80},
+ {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83},
+ {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86},
+ {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89},
+ {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c},
+ {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f},
+ {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92},
+ {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95},
+ {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98},
+ {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b},
+ {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e},
+ {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1},
+ {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4},
+ {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7},
+ {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa},
+ {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad},
+ {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0},
+ {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3},
+ {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6},
+ {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9},
+ {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc},
+ {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf},
+ {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2},
+ {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5},
+ {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8},
+ {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb},
+ {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce},
+ {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1},
+ {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4},
+ {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7},
+ {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda},
+ {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd},
+ {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0},
+ {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3},
+ {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6},
+ {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9},
+ {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec},
+ {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef},
+ {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2},
+ {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5},
+ {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8},
+ {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb},
+ {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
+ {0x00, 0xff, 0xff}};
+
+struct enc_entry {
+ const char* enc_name;
+ struct cs_info* cs_table;
+};
+
+static struct enc_entry encds[] = {
+ {"iso88591", iso1_tbl}, // ISO-8859-1
+ {"iso88592", iso2_tbl}, // ISO-8859-2
+ {"iso88593", iso3_tbl}, // ISO-8859-3
+ {"iso88594", iso4_tbl}, // ISO-8859-4
+ {"iso88595", iso5_tbl}, // ISO-8859-5
+ {"iso88596", iso6_tbl}, // ISO-8859-6
+ {"iso88597", iso7_tbl}, // ISO-8859-7
+ {"iso88598", iso8_tbl}, // ISO-8859-8
+ {"iso88599", iso9_tbl}, // ISO-8859-9
+ {"iso885910", iso10_tbl}, // ISO-8859-10
+ {"tis620", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"tis6202533", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"iso885911", tis620_tbl}, // TIS-620/ISO-8859-11
+ {"iso885913", iso13_tbl}, // ISO-8859-13
+ {"iso885914", iso14_tbl}, // ISO-8859-14
+ {"iso885915", iso15_tbl}, // ISO-8859-15
+ {"koi8r", koi8r_tbl}, // KOI8-R
+ {"koi8u", koi8u_tbl}, // KOI8-U
+ {"cp1251", cp1251_tbl}, // CP-1251
+ {"microsoftcp1251", cp1251_tbl}, // microsoft-cp1251
+ {"xisciias", iscii_devanagari_tbl}, // x-iscii-as
+ {"isciidevanagari", iscii_devanagari_tbl} // ISCII-DEVANAGARI
+};
+
+/* map to lower case and remove non alphanumeric chars */
+static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
+ char* pBuf) {
+ while (*pName) {
+ /* A-Z */
+ if ((*pName >= 0x41) && (*pName <= 0x5A)) {
+ *pBuf = (*pName) + 0x20; /* toAsciiLower */
+ pBuf++;
+ }
+ /* a-z, 0-9 */
+ else if (((*pName >= 0x61) && (*pName <= 0x7A)) ||
+ ((*pName >= 0x30) && (*pName <= 0x39))) {
+ *pBuf = *pName;
+ pBuf++;
+ }
+
+ pName++;
+ }
+
+ *pBuf = '\0';
+}
+
+struct cs_info* get_current_cs(const std::string& es) {
+ char* normalized_encoding = new char[es.size() + 1];
+ toAsciiLowerAndRemoveNonAlphanumeric(es.c_str(), normalized_encoding);
+
+ struct cs_info* ccs = NULL;
+ int n = sizeof(encds) / sizeof(encds[0]);
+ for (int i = 0; i < n; i++) {
+ if (strcmp(normalized_encoding, encds[i].enc_name) == 0) {
+ ccs = encds[i].cs_table;
+ break;
+ }
+ }
+
+ delete[] normalized_encoding;
+
+ if (!ccs) {
+ HUNSPELL_WARNING(stderr,
+ "error: unknown encoding %s: using %s as fallback\n", es.c_str(),
+ encds[0].enc_name);
+ ccs = encds[0].cs_table;
+ }
+
+ return ccs;
+}
+#else
+struct cs_info* get_current_cs(const std::string& es) {
+ return moz_hunspell_GetCurrentCS(es.c_str());
+}
+#endif
+
+// primitive isalpha() replacement for tokenization
+std::string get_casechars(const char* enc) {
+ struct cs_info* csconv = get_current_cs(enc);
+ std::string expw;
+ for (int i = 0; i <= 255; ++i) {
+ if (cupper(csconv, i) != clower(csconv, i)) {
+ expw.push_back(static_cast<char>(i));
+ }
+ }
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+ return expw;
+}
+
+// language to encoding default map
+
+struct lang_map {
+ const char* lang;
+ int num;
+};
+
+static struct lang_map lang2enc[] =
+ {{"ar", LANG_ar}, {"az", LANG_az},
+ {"az_AZ", LANG_az}, // for back-compatibility
+ {"bg", LANG_bg}, {"ca", LANG_ca},
+ {"crh", LANG_crh},
+ {"cs", LANG_cs}, {"da", LANG_da},
+ {"de", LANG_de}, {"el", LANG_el},
+ {"en", LANG_en}, {"es", LANG_es},
+ {"eu", LANG_eu}, {"gl", LANG_gl},
+ {"fr", LANG_fr}, {"hr", LANG_hr},
+ {"hu", LANG_hu}, {"hu_HU", LANG_hu}, // for back-compatibility
+ {"it", LANG_it}, {"la", LANG_la},
+ {"lv", LANG_lv}, {"nl", LANG_nl},
+ {"pl", LANG_pl}, {"pt", LANG_pt},
+ {"sv", LANG_sv}, {"tr", LANG_tr},
+ {"tr_TR", LANG_tr}, // for back-compatibility
+ {"ru", LANG_ru}, {"uk", LANG_uk}};
+
+int get_lang_num(const std::string& lang) {
+ int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
+ for (int i = 0; i < n; i++) {
+ if (strcmp(lang.c_str(), lang2enc[i].lang) == 0) {
+ return lang2enc[i].num;
+ }
+ }
+ return LANG_xx;
+}
+
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+void initialize_utf_tbl() {
+ utf_tbl_count++;
+ if (utf_tbl)
+ return;
+ utf_tbl = new unicode_info2[CONTSIZE];
+ for (size_t j = 0; j < CONTSIZE; ++j) {
+ utf_tbl[j].cletter = 0;
+ utf_tbl[j].clower = (unsigned short)j;
+ utf_tbl[j].cupper = (unsigned short)j;
+ }
+ for (size_t j = 0; j < UTF_LST_LEN; ++j) {
+ utf_tbl[utf_lst[j].c].cletter = 1;
+ utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
+ utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
+ }
+}
+#endif
+#endif
+
+void free_utf_tbl() {
+ if (utf_tbl_count > 0)
+ utf_tbl_count--;
+ if (utf_tbl && (utf_tbl_count == 0)) {
+ delete[] utf_tbl;
+ utf_tbl = NULL;
+ }
+}
+
+unsigned short unicodetoupper(unsigned short c, int langnum) {
+ // In Azeri and Turkish, I and i dictinct letters:
+ // There are a dotless lower case i pair of upper `I',
+ // and an upper I with dot pair of lower `i'.
+ if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
+ return 0x0130;
+#ifdef OPENOFFICEORG
+ return static_cast<unsigned short>(u_toupper(c));
+#else
+#ifdef MOZILLA_CLIENT
+ return moz_hunspell_ToUpperCase((char16_t)c);
+#else
+ return (utf_tbl) ? utf_tbl[c].cupper : c;
+#endif
+#endif
+}
+
+unsigned short unicodetolower(unsigned short c, int langnum) {
+ // In Azeri and Turkish, I and i dictinct letters:
+ // There are a dotless lower case i pair of upper `I',
+ // and an upper I with dot pair of lower `i'.
+ if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh)))
+ return 0x0131;
+#ifdef OPENOFFICEORG
+ return static_cast<unsigned short>(u_tolower(c));
+#else
+#ifdef MOZILLA_CLIENT
+ return moz_hunspell_ToLowerCase((char16_t)c);
+#else
+ return (utf_tbl) ? utf_tbl[c].clower : c;
+#endif
+#endif
+}
+
+int unicodeisalpha(unsigned short c) {
+#ifdef OPENOFFICEORG
+ return u_isalpha(c);
+#else
+ return (utf_tbl) ? utf_tbl[c].cletter : 0;
+#endif
+}
+
+/* get type of capitalization */
+int get_captype(const std::string& word, cs_info* csconv) {
+ // now determine the capitalization type of the first nl letters
+ size_t ncap = 0;
+ size_t nneutral = 0;
+ size_t firstcap = 0;
+ if (csconv == NULL)
+ return NOCAP;
+ for (std::string::const_iterator q = word.begin(); q != word.end(); ++q) {
+ unsigned char nIndex = static_cast<unsigned char>(*q);
+ if (ccase(csconv, nIndex))
+ ncap++;
+ if (cupper(csconv, nIndex) == clower(csconv, nIndex))
+ nneutral++;
+ }
+ if (ncap) {
+ unsigned char nIndex = static_cast<unsigned char>(word[0]);
+ firstcap = csconv[nIndex].ccase;
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
+ // now determine the capitalization type of the first nl letters
+ size_t ncap = 0;
+ size_t nneutral = 0;
+ size_t firstcap = 0;
+
+ std::vector<w_char>::const_iterator it = word.begin();
+ std::vector<w_char>::const_iterator it_end = word.end();
+ while (it != it_end) {
+ unsigned short idx = (it->h << 8) + it->l;
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx)
+ ncap++;
+ if (unicodetoupper(idx, langnum) == lwridx)
+ nneutral++;
+ ++it;
+ }
+ if (ncap) {
+ unsigned short idx = (word[0].h << 8) + word[0].l;
+ firstcap = (idx != unicodetolower(idx, langnum));
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ return NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ return INITCAP;
+ } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) {
+ return ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ return HUHINITCAP;
+ }
+ return HUHCAP;
+}
+
+// strip all ignored characters in the string
+size_t remove_ignored_chars_utf(std::string& word,
+ const std::vector<w_char>& ignored_chars) {
+ std::vector<w_char> w;
+ std::vector<w_char> w2;
+ u8_u16(w, word);
+
+ for (size_t i = 0; i < w.size(); ++i) {
+ if (!std::binary_search(ignored_chars.begin(),
+ ignored_chars.end(),
+ w[i])) {
+ w2.push_back(w[i]);
+ }
+ }
+
+ u16_u8(word, w2);
+ return w2.size();
+}
+
+// strip all ignored characters in the string
+size_t remove_ignored_chars(std::string& word,
+ const std::string& ignored_chars) {
+ word.erase(
+ std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars)),
+ word.end());
+ return word.size();
+}
+
+bool parse_string(const std::string& line, std::string& out, int ln) {
+ if (!out.empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
+ return false;
+ }
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ out.assign(start_piece, iter);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);
+ return false;
+ }
+ return true;
+}
+
+bool parse_array(const std::string& line,
+ std::string& out,
+ std::vector<w_char>& out_utf16,
+ int utf8,
+ int ln) {
+ if (!parse_string(line, out, ln))
+ return false;
+ if (utf8) {
+ u8_u16(out_utf16, out);
+ std::sort(out_utf16.begin(), out_utf16.end());
+ }
+ return true;
+}
diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx
new file mode 100644
index 0000000000..96f15c1469
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/csutil.hxx
@@ -0,0 +1,323 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef CSUTIL_HXX_
+#define CSUTIL_HXX_
+
+#include "hunvisapi.h"
+
+// First some base level utility routines
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <string.h>
+#include "w_char.hxx"
+#include "htypes.hxx"
+
+// casing
+#define NOCAP 0
+#define INITCAP 1
+#define ALLCAP 2
+#define HUHCAP 3
+#define HUHINITCAP 4
+
+// default encoding and keystring
+#define SPELL_ENCODING "ISO8859-1"
+#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
+
+// default morphological fields
+#define MORPH_STEM "st:"
+#define MORPH_ALLOMORPH "al:"
+#define MORPH_POS "po:"
+#define MORPH_DERI_PFX "dp:"
+#define MORPH_INFL_PFX "ip:"
+#define MORPH_TERM_PFX "tp:"
+#define MORPH_DERI_SFX "ds:"
+#define MORPH_INFL_SFX "is:"
+#define MORPH_TERM_SFX "ts:"
+#define MORPH_SURF_PFX "sp:"
+#define MORPH_FREQ "fr:"
+#define MORPH_PHON "ph:"
+#define MORPH_HYPH "hy:"
+#define MORPH_PART "pa:"
+#define MORPH_FLAG "fl:"
+#define MORPH_HENTRY "_H:"
+#define MORPH_TAG_LEN strlen(MORPH_STEM)
+
+#define MSEP_FLD ' '
+#define MSEP_REC '\n'
+#define MSEP_ALT '\v'
+
+// default flags
+#define DEFAULTFLAGS 65510
+#define FORBIDDENWORD 65510
+#define ONLYUPCASEFLAG 65511
+
+// fix long pathname problem of WIN32 by using w_char std::fstream::open override
+LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
+ std::ios_base::openmode mode);
+
+// convert UTF-16 characters to UTF-8
+LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
+ const std::vector<w_char>& src);
+
+// convert UTF-8 characters to UTF-16
+LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
+ const std::string& src);
+
+// remove end of line char(s)
+LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
+
+// duplicate string
+LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
+
+// parse into tokens with char delimiter
+LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
+ std::string::const_iterator& start);
+
+// replace pat by rep in word and return word
+LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
+ const std::string& search,
+ const std::string& replace);
+
+// append s to ends of every lines in text
+LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
+ const std::string& apd);
+
+// tokenize into lines with new line
+LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
+ char breakchar);
+
+// tokenize into lines with new line and uniq in place
+LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
+
+LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
+
+// reverse word
+LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
+
+// reverse word
+LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
+
+// remove duplicates
+LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
+
+// character encoding information
+struct cs_info {
+ unsigned char ccase;
+ unsigned char clower;
+ unsigned char cupper;
+};
+
+LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
+ int langnum);
+LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
+ int langnum);
+LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
+
+LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
+
+// get language identifiers of language codes
+LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
+
+// get characters of the given 8bit encoding with lower- and uppercase forms
+LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
+
+// convert std::string to all caps
+LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
+ const struct cs_info* csconv);
+
+// convert null terminated string to all little
+LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of string to little
+LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of string to capital
+LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
+ const struct cs_info* csconv);
+
+// convert first letter of UTF-8 string to capital
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkinitcap_utf(std::vector<w_char>& u, int langnum);
+
+// convert UTF-8 string to little
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkallsmall_utf(std::vector<w_char>& u, int langnum);
+
+// convert first letter of UTF-8 string to little
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkinitsmall_utf(std::vector<w_char>& u, int langnum);
+
+// convert UTF-8 string to capital
+LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
+mkallcap_utf(std::vector<w_char>& u, int langnum);
+
+// get type of capitalization
+LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
+
+// get type of capitalization (UTF-8)
+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
+
+// strip all ignored characters in the string
+LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
+ std::string& word,
+ const std::vector<w_char>& ignored_chars);
+
+// strip all ignored characters in the string
+LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
+ std::string& word,
+ const std::string& ignored_chars);
+
+LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
+ std::string& out,
+ int ln);
+
+LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
+ std::string& out,
+ std::vector<w_char>& out_utf16,
+ int utf8,
+ int ln);
+
+LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
+
+LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
+ const std::string& morph,
+ const std::string& var);
+
+// conversion function for protected memory
+LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
+
+// conversion function for protected memory
+LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
+
+
+// to avoid unnecessary string copies and Unicode conversions
+// we simply check the ignored_chars characters in the word
+// (in the case of UTF-8 encoded strings, "false" means
+// "likely false", if ignored_chars characters are not ASCII)
+inline bool has_no_ignored_chars(const std::string& word,
+ const std::string& ignored_chars) {
+ for (std::string::const_iterator it = ignored_chars.begin(), end = ignored_chars.end(); it != end; ++it)
+ if (word.find(*it) != std::string::npos)
+ return false;
+ return true;
+}
+
+// hash entry macros
+inline char* HENTRY_DATA(struct hentry* h) {
+ char* ret;
+ if (!(h->var & H_OPT))
+ ret = NULL;
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+inline const char* HENTRY_DATA(
+ const struct hentry* h) {
+ const char* ret;
+ if (!(h->var & H_OPT))
+ ret = NULL;
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+// NULL-free version for warning-free OOo build
+inline const char* HENTRY_DATA2(
+ const struct hentry* h) {
+ const char* ret;
+ if (!(h->var & H_OPT))
+ ret = "";
+ else if (h->var & H_OPT_ALIASM)
+ ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
+ else
+ ret = HENTRY_WORD(h) + h->blen + 1;
+ return ret;
+}
+
+inline char* HENTRY_FIND(struct hentry* h,
+ const char* p) {
+ return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
+}
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx
new file mode 100644
index 0000000000..7773a321a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/filemgr.hxx
@@ -0,0 +1,77 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* file manager class - read lines of files [filename] OR [filename.hz] */
+#ifndef FILEMGR_HXX_
+#define FILEMGR_HXX_
+
+#include "mozHunspellRLBoxSandbox.h"
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx
new file mode 100644
index 0000000000..d22f2e7b7d
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx
@@ -0,0 +1,1415 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <limits>
+#include <sstream>
+
+#include "hashmgr.hxx"
+#include "csutil.hxx"
+#include "atypes.hxx"
+#include "langnum.hxx"
+
+// build a hash table from a munched word list
+
+HashMgr::HashMgr(const char* tpath, const char* apath, const char* key)
+ : tablesize(0),
+ tableptr(NULL),
+ flag_mode(FLAG_CHAR),
+ complexprefixes(0),
+ utf8(0),
+ forbiddenword(FORBIDDENWORD) // forbidden word signing flag
+ ,
+ numaliasf(0),
+ aliasf(NULL),
+ aliasflen(0),
+ numaliasm(0),
+ aliasm(NULL) {
+ langnum = 0;
+ csconv = 0;
+ load_config(apath, key);
+ int ec = load_tables(tpath, key);
+ if (ec) {
+ /* error condition - what should we do here */
+ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec);
+ free(tableptr);
+ //keep tablesize to 1 to fix possible division with zero
+ tablesize = 1;
+ tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
+ if (!tableptr) {
+ tablesize = 0;
+ }
+ }
+}
+
+HashMgr::~HashMgr() {
+ if (tableptr) {
+ // now pass through hash table freeing up everything
+ // go through column by column of the table
+ for (int i = 0; i < tablesize; i++) {
+ struct hentry* pt = tableptr[i];
+ struct hentry* nt = NULL;
+ while (pt) {
+ nt = pt->next;
+ if (pt->astr &&
+ (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen)))
+ arena_free(pt->astr);
+ arena_free(pt);
+ pt = nt;
+ }
+ }
+ free(tableptr);
+ }
+ tablesize = 0;
+
+ if (aliasf) {
+ for (int j = 0; j < (numaliasf); j++)
+ arena_free(aliasf[j]);
+ arena_free(aliasf);
+ aliasf = NULL;
+ if (aliasflen) {
+ arena_free(aliasflen);
+ aliasflen = NULL;
+ }
+ }
+ if (aliasm) {
+ for (int j = 0; j < (numaliasm); j++)
+ arena_free(aliasm[j]);
+ arena_free(aliasm);
+ aliasm = NULL;
+ }
+
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ if (utf8)
+ free_utf_tbl();
+#endif
+#endif
+
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+
+ assert(outstanding_arena_allocations == 0);
+}
+
+// lookup a root word in the hashtable
+
+struct hentry* HashMgr::lookup(const char* word) const {
+ struct hentry* dp;
+ if (tableptr) {
+ dp = tableptr[hash(word)];
+ if (!dp)
+ return NULL;
+ for (; dp != NULL; dp = dp->next) {
+ if (strcmp(word, dp->word) == 0)
+ return dp;
+ }
+ }
+ return NULL;
+}
+
+// add a word to the hash table (private)
+int HashMgr::add_word(const std::string& in_word,
+ int wcl,
+ unsigned short* aff,
+ int al,
+ const std::string* in_desc,
+ bool onlyupcase,
+ int captype) {
+ const std::string* word = &in_word;
+ const std::string* desc = in_desc;
+
+ std::string *word_copy = NULL;
+ std::string *desc_copy = NULL;
+ if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) {
+ word_copy = new std::string(in_word);
+
+ if (!ignorechars.empty()) {
+ if (utf8) {
+ wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16);
+ } else {
+ remove_ignored_chars(*word_copy, ignorechars);
+ }
+ }
+
+ if (complexprefixes) {
+ if (utf8)
+ wcl = reverseword_utf(*word_copy);
+ else
+ reverseword(*word_copy);
+
+ if (in_desc && !aliasm) {
+ desc_copy = new std::string(*in_desc);
+
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(*desc_copy);
+ else
+ reverseword(*desc_copy);
+ }
+ desc = desc_copy;
+ }
+ }
+
+ word = word_copy;
+ }
+
+ bool upcasehomonym = false;
+ int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0;
+ // variable-length hash record with word and optional fields
+ struct hentry* hp =
+ (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl);
+ if (!hp) {
+ delete desc_copy;
+ delete word_copy;
+ return 1;
+ }
+
+ char* hpw = hp->word;
+ strcpy(hpw, word->c_str());
+
+ int i = hash(hpw);
+
+ hp->blen = (unsigned char)word->size();
+ hp->clen = (unsigned char)wcl;
+ hp->alen = (short)al;
+ hp->astr = aff;
+ hp->next = NULL;
+ hp->next_homonym = NULL;
+ hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0;
+
+ // store the description string or its pointer
+ if (desc) {
+ hp->var |= H_OPT;
+ if (aliasm) {
+ hp->var |= H_OPT_ALIASM;
+ store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str())));
+ } else {
+ strcpy(hpw + word->size() + 1, desc->c_str());
+ }
+ if (strstr(HENTRY_DATA(hp), MORPH_PHON)) {
+ hp->var |= H_OPT_PHON;
+ // store ph: fields (pronounciation, misspellings, old orthography etc.)
+ // of a morphological description in reptable to use in REP replacements.
+ if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO))
+ reptable.reserve(tablesize/MORPH_PHON_RATIO);
+ std::string fields = HENTRY_DATA(hp);
+ std::string::const_iterator iter = fields.begin();
+ std::string::const_iterator start_piece = mystrsep(fields, iter);
+ while (start_piece != fields.end()) {
+ if (std::string(start_piece, iter).find(MORPH_PHON) == 0) {
+ std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1);
+ if (ph.size() > 0) {
+ std::vector<w_char> w;
+ size_t strippatt;
+ std::string wordpart;
+ // dictionary based REP replacement, separated by "->"
+ // for example "pretty ph:prity ph:priti->pretti" to handle
+ // both prity -> pretty and pritier -> prettiest suggestions.
+ if (((strippatt = ph.find("->")) != std::string::npos) &&
+ (strippatt > 0) && (strippatt < ph.size() - 2)) {
+ wordpart = ph.substr(strippatt + 2);
+ ph.erase(ph.begin() + strippatt, ph.end());
+ } else
+ wordpart = in_word;
+ // when the ph: field ends with the character *,
+ // strip last character of the pattern and the replacement
+ // to match in REP suggestions also at character changes,
+ // for example, "pretty ph:prity*" results "prit->prett"
+ // REP replacement instead of "prity->pretty", to get
+ // prity->pretty and pritiest->prettiest suggestions.
+ if (ph.at(ph.size()-1) == '*') {
+ strippatt = 1;
+ size_t stripword = 0;
+ if (utf8) {
+ while ((strippatt < ph.size()) &&
+ ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80))
+ ++strippatt;
+ while ((stripword < wordpart.size()) &&
+ ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80))
+ ++stripword;
+ }
+ ++strippatt;
+ ++stripword;
+ if ((ph.size() > strippatt) && (wordpart.size() > stripword)) {
+ ph.erase(ph.size()-strippatt, strippatt);
+ wordpart.erase(in_word.size()-stripword, stripword);
+ }
+ }
+ // capitalize lowercase pattern for capitalized words to support
+ // good suggestions also for capitalized misspellings, eg.
+ // Wednesday ph:wendsay
+ // results wendsay -> Wednesday and Wendsay -> Wednesday, too.
+ if (captype==INITCAP) {
+ std::string ph_capitalized;
+ if (utf8) {
+ u8_u16(w, ph);
+ if (get_captype_utf8(w, langnum) == NOCAP) {
+ mkinitcap_utf(w, langnum);
+ u16_u8(ph_capitalized, w);
+ }
+ } else if (get_captype(ph, csconv) == NOCAP)
+ mkinitcap(ph_capitalized, csconv);
+
+ if (ph_capitalized.size() > 0) {
+ // add also lowercase word in the case of German or
+ // Hungarian to support lowercase suggestions lowercased by
+ // compound word generation or derivational suffixes
+ // (for example by adjectival suffix "-i" of geographical
+ // names in Hungarian:
+ // Massachusetts ph:messzecsuzec
+ // messzecsuzeci -> massachusettsi (adjective)
+ // For lowercasing by conditional PFX rules, see
+ // tests/germancompounding test example or the
+ // Hungarian dictionary.)
+ if (langnum == LANG_de || langnum == LANG_hu) {
+ std::string wordpart_lower(wordpart);
+ if (utf8) {
+ u8_u16(w, wordpart_lower);
+ mkallsmall_utf(w, langnum);
+ u16_u8(wordpart_lower, w);
+ } else {
+ mkallsmall(wordpart_lower, csconv);
+ }
+ reptable.push_back(replentry());
+ reptable.back().pattern.assign(ph);
+ reptable.back().outstrings[0].assign(wordpart_lower);
+ }
+ reptable.push_back(replentry());
+ reptable.back().pattern.assign(ph_capitalized);
+ reptable.back().outstrings[0].assign(wordpart);
+ }
+ }
+ reptable.push_back(replentry());
+ reptable.back().pattern.assign(ph);
+ reptable.back().outstrings[0].assign(wordpart);
+ }
+ }
+ start_piece = mystrsep(fields, iter);
+ }
+ }
+ }
+
+ struct hentry* dp = tableptr[i];
+ if (!dp) {
+ tableptr[i] = hp;
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ }
+ while (dp->next != NULL) {
+ if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+ arena_free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+ arena_free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ dp = dp->next;
+ }
+ if (strcmp(hp->word, dp->word) == 0) {
+ // remove hidden onlyupcase homonym
+ if (!onlyupcase) {
+ if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
+ arena_free(dp->astr);
+ dp->astr = hp->astr;
+ dp->alen = hp->alen;
+ arena_free(hp);
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+ } else {
+ dp->next_homonym = hp;
+ }
+ } else {
+ upcasehomonym = true;
+ }
+ }
+ if (!upcasehomonym) {
+ dp->next = hp;
+ } else {
+ // remove hidden onlyupcase homonym
+ if (hp->astr)
+ arena_free(hp->astr);
+ arena_free(hp);
+ }
+
+ delete desc_copy;
+ delete word_copy;
+ return 0;
+}
+
+int HashMgr::add_hidden_capitalized_word(const std::string& word,
+ int wcl,
+ unsigned short* flags,
+ int flagslen,
+ const std::string* dp,
+ int captype) {
+ if (flags == NULL)
+ flagslen = 0;
+
+ // add inner capitalized forms to handle the following allcap forms:
+ // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG
+ // Allcaps with suffixes: CIA's -> CIA'S
+ if (((captype == HUHCAP) || (captype == HUHINITCAP) ||
+ ((captype == ALLCAP) && (flagslen != 0))) &&
+ !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) {
+ unsigned short* flags2 =
+ (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1));
+ if (!flags2)
+ return 1;
+ if (flagslen)
+ memcpy(flags2, flags, flagslen * sizeof(unsigned short));
+ flags2[flagslen] = ONLYUPCASEFLAG;
+ if (utf8) {
+ std::string st;
+ std::vector<w_char> w;
+ u8_u16(w, word);
+ mkallsmall_utf(w, langnum);
+ mkinitcap_utf(w, langnum);
+ u16_u8(st, w);
+ return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP);
+ } else {
+ std::string new_word(word);
+ mkallsmall(new_word, csconv);
+ mkinitcap(new_word, csconv);
+ int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+// detect captype and modify word length for UTF-8 encoding
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) {
+ int len;
+ if (utf8) {
+ len = u8_u16(workbuf, word);
+ *captype = get_captype_utf8(workbuf, langnum);
+ } else {
+ len = word.size();
+ *captype = get_captype(word, csconv);
+ }
+ return len;
+}
+
+int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
+ std::vector<w_char> workbuf;
+ return get_clen_and_captype(word, captype, workbuf);
+}
+
+// remove word (personal dictionary function for standalone applications)
+int HashMgr::remove(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
+ while (dp) {
+ if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
+ unsigned short* flags =
+ (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1));
+ if (!flags)
+ return 1;
+ for (int i = 0; i < dp->alen; i++)
+ flags[i] = dp->astr[i];
+ flags[dp->alen] = forbiddenword;
+ arena_free(dp->astr);
+ dp->astr = flags;
+ dp->alen++;
+ std::sort(flags, flags + dp->alen);
+ }
+ dp = dp->next_homonym;
+ }
+ return 0;
+}
+
+/* remove forbidden flag to add a personal word to the hash */
+int HashMgr::remove_forbidden_flag(const std::string& word) {
+ struct hentry* dp = lookup(word.c_str());
+ if (!dp)
+ return 1;
+ while (dp) {
+ if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen))
+ dp->alen = 0; // XXX forbidden words of personal dic.
+ dp = dp->next_homonym;
+ }
+ return 0;
+}
+
+// add a custom dic. word to the hash table (public)
+int HashMgr::add(const std::string& word) {
+ if (remove_forbidden_flag(word)) {
+ int captype;
+ int al = 0;
+ unsigned short* flags = NULL;
+ int wcl = get_clen_and_captype(word, &captype);
+ add_word(word, wcl, flags, al, NULL, false, captype);
+ return add_hidden_capitalized_word(word, wcl, flags, al, NULL,
+ captype);
+ }
+ return 0;
+}
+
+int HashMgr::add_with_affix(const std::string& word, const std::string& example) {
+ // detect captype and modify word length for UTF-8 encoding
+ struct hentry* dp = lookup(example.c_str());
+ remove_forbidden_flag(word);
+ if (dp && dp->astr) {
+ int captype;
+ int wcl = get_clen_and_captype(word, &captype);
+ if (aliasf) {
+ add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype);
+ } else {
+ unsigned short* flags =
+ (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short));
+ if (flags) {
+ memcpy((void*)flags, (void*)dp->astr,
+ dp->alen * sizeof(unsigned short));
+ add_word(word, wcl, flags, dp->alen, NULL, false, captype);
+ } else
+ return 1;
+ }
+ return add_hidden_capitalized_word(word, wcl, dp->astr,
+ dp->alen, NULL, captype);
+ }
+ return 1;
+}
+
+// walk the hash table entry by entry - null at end
+// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp);
+struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const {
+ if (hp && hp->next != NULL)
+ return hp->next;
+ for (col++; col < tablesize; col++) {
+ if (tableptr[col])
+ return tableptr[col];
+ }
+ // null at end and reset to start
+ col = -1;
+ return NULL;
+}
+
+// load a munched word list and build a hash table on the fly
+int HashMgr::load_tables(const char* tpath, const char* key) {
+ // open dictionary file
+ FileMgr* dict = new FileMgr(tpath, key);
+ if (dict == NULL)
+ return 1;
+
+ // first read the first line of file to get hash table size */
+ std::string ts;
+ if (!dict->getline(ts)) {
+ HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath);
+ delete dict;
+ return 2;
+ }
+ mychomp(ts);
+
+ /* remove byte order mark */
+ if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
+ ts.erase(0, 3);
+ }
+
+ tablesize = atoi(ts.c_str());
+
+ int nExtra = 5 + USERWORD;
+
+ if (tablesize <= 0 ||
+ (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) /
+ int(sizeof(struct hentry*)))) {
+ HUNSPELL_WARNING(
+ stderr, "error: line 1: missing or bad word count in the dic file\n");
+ delete dict;
+ return 4;
+ }
+ tablesize += nExtra;
+ if ((tablesize % 2) == 0)
+ tablesize++;
+
+ // allocate the hash table
+ tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*));
+ if (!tableptr) {
+ delete dict;
+ return 3;
+ }
+
+ // loop through all words on much list and add to hash
+ // table and create word and affix strings
+
+ std::vector<w_char> workbuf;
+
+ while (dict->getline(ts)) {
+ mychomp(ts);
+ // split each line into word and morphological description
+ size_t dp_pos = 0;
+ while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) {
+ if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) {
+ for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos)
+ ;
+ if (dp_pos == 0) { // missing word
+ dp_pos = std::string::npos;
+ } else {
+ ++dp_pos;
+ }
+ break;
+ }
+ ++dp_pos;
+ }
+
+ // tabulator is the old morphological field separator
+ size_t dp2_pos = ts.find('\t');
+ if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) {
+ dp_pos = dp2_pos + 1;
+ }
+
+ std::string dp;
+ if (dp_pos != std::string::npos) {
+ dp.assign(ts.substr(dp_pos));
+ ts.resize(dp_pos - 1);
+ }
+
+ // split each line into word and affix char strings
+ // "\/" signs slash in words (not affix separator)
+ // "/" at beginning of the line is word character (not affix separator)
+ size_t ap_pos = ts.find('/');
+ while (ap_pos != std::string::npos) {
+ if (ap_pos == 0) {
+ ++ap_pos;
+ continue;
+ } else if (ts[ap_pos - 1] != '\\')
+ break;
+ // replace "\/" with "/"
+ ts.erase(ap_pos - 1, 1);
+ ap_pos = ts.find('/', ap_pos);
+ }
+
+ unsigned short* flags;
+ int al;
+ if (ap_pos != std::string::npos && ap_pos != ts.size()) {
+ std::string ap(ts.substr(ap_pos + 1));
+ ts.resize(ap_pos);
+ if (aliasf) {
+ int index = atoi(ap.c_str());
+ al = get_aliasf(index, &flags, dict);
+ if (!al) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n",
+ dict->getlinenum());
+ }
+ } else {
+ al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true);
+ if (al == -1) {
+ HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
+ delete dict;
+ return 6;
+ }
+ std::sort(flags, flags + al);
+ }
+ } else {
+ al = 0;
+ flags = NULL;
+ }
+
+ int captype;
+ int wcl = get_clen_and_captype(ts, &captype, workbuf);
+ const std::string *dp_str = dp.empty() ? NULL : &dp;
+ // add the word and its index plus its capitalized form optionally
+ if (add_word(ts, wcl, flags, al, dp_str, false, captype) ||
+ add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) {
+ delete dict;
+ return 5;
+ }
+ }
+
+ delete dict;
+ return 0;
+}
+
+// the hash function is a simple load and rotate
+// algorithm borrowed
+int HashMgr::hash(const char* word) const {
+ unsigned long hv = 0;
+ for (int i = 0; i < 4 && *word != 0; i++)
+ hv = (hv << 8) | (*word++);
+ while (*word != 0) {
+ ROTATE(hv, ROTATE_LEN);
+ hv ^= (*word++);
+ }
+ return (unsigned long)hv % tablesize;
+}
+
+int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const {
+ auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); };
+ int len;
+ if (flags.empty()) {
+ *result = NULL;
+ return 0;
+ }
+ switch (flag_mode) {
+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+ len = flags.size();
+ if (len % 2 == 1)
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
+ af->getlinenum());
+ len /= 2;
+ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ for (int i = 0; i < len; i++) {
+ (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) +
+ (unsigned char)flags[i * 2 + 1];
+ }
+ break;
+ }
+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
+ // 23 233)
+ len = 1;
+ unsigned short* dest;
+ for (size_t i = 0; i < flags.size(); ++i) {
+ if (flags[i] == ',')
+ len++;
+ }
+ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ const char* src = flags.c_str();
+ for (const char* p = src; *p; p++) {
+ if (*p == ',') {
+ int i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(
+ stderr, "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ *dest = (unsigned short)i;
+ if (*dest == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ src = p + 1;
+ dest++;
+ }
+ }
+ int i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ *dest = (unsigned short)i;
+ if (*dest == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ break;
+ }
+ case FLAG_UNI: { // UTF-8 characters
+ std::vector<w_char> w;
+ u8_u16(w, flags);
+ len = w.size();
+ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ memcpy(*result, w.data(), len * sizeof(short));
+ break;
+ }
+ default: { // Ispell's one-character flags (erfg -> e r f g)
+ unsigned short* dest;
+ len = flags.size();
+ *result = (unsigned short*)alloc(len * sizeof(unsigned short));
+ if (!*result)
+ return -1;
+ dest = *result;
+ for (size_t i = 0; i < flags.size(); ++i) {
+ *dest = (unsigned char)flags[i];
+ dest++;
+ }
+ }
+ }
+ return len;
+}
+
+bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const {
+ if (flags.empty()) {
+ return false;
+ }
+ switch (flag_mode) {
+ case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
+ size_t len = flags.size();
+ if (len % 2 == 1)
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n",
+ af->getlinenum());
+ len /= 2;
+ result.reserve(result.size() + len);
+ for (size_t i = 0; i < len; ++i) {
+ result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) +
+ (unsigned char)flags[i * 2 + 1]);
+ }
+ break;
+ }
+ case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521
+ // 23 233)
+ const char* src = flags.c_str();
+ for (const char* p = src; *p; p++) {
+ if (*p == ',') {
+ int i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(
+ stderr, "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ result.push_back((unsigned short)i);
+ if (result.back() == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ src = p + 1;
+ }
+ }
+ int i = atoi(src);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: flag id %d is too large (max: %d)\n",
+ af->getlinenum(), i, DEFAULTFLAGS - 1);
+ result.push_back((unsigned short)i);
+ if (result.back() == 0)
+ HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n",
+ af->getlinenum());
+ break;
+ }
+ case FLAG_UNI: { // UTF-8 characters
+ std::vector<w_char> w;
+ u8_u16(w, flags);
+ size_t len = w.size();
+ size_t origsize = result.size();
+ result.resize(origsize + len);
+ memcpy(result.data() + origsize, w.data(), len * sizeof(short));
+ break;
+ }
+ default: { // Ispell's one-character flags (erfg -> e r f g)
+ result.reserve(flags.size());
+ for (size_t i = 0; i < flags.size(); ++i) {
+ result.push_back((unsigned char)flags[i]);
+ }
+ }
+ }
+ return true;
+}
+
+unsigned short HashMgr::decode_flag(const char* f) const {
+ unsigned short s = 0;
+ int i;
+ switch (flag_mode) {
+ case FLAG_LONG:
+ s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1];
+ break;
+ case FLAG_NUM:
+ i = atoi(f);
+ if (i >= DEFAULTFLAGS)
+ HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n",
+ i, DEFAULTFLAGS - 1);
+ s = (unsigned short)i;
+ break;
+ case FLAG_UNI: {
+ std::vector<w_char> w;
+ u8_u16(w, f);
+ if (!w.empty())
+ memcpy(&s, w.data(), 1 * sizeof(short));
+ break;
+ }
+ default:
+ s = *(unsigned char*)f;
+ }
+ if (s == 0)
+ HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n");
+ return s;
+}
+
+// This function is only called by external consumers, and so using the default
+// allocator with mystrdup is correct.
+char* HashMgr::encode_flag(unsigned short f) const {
+ if (f == 0)
+ return mystrdup("(NULL)");
+ std::string ch;
+ if (flag_mode == FLAG_LONG) {
+ ch.push_back((unsigned char)(f >> 8));
+ ch.push_back((unsigned char)(f - ((f >> 8) << 8)));
+ } else if (flag_mode == FLAG_NUM) {
+ std::ostringstream stream;
+ stream << f;
+ ch = stream.str();
+ } else if (flag_mode == FLAG_UNI) {
+ const w_char* w_c = (const w_char*)&f;
+ std::vector<w_char> w(w_c, w_c + 1);
+ u16_u8(ch, w);
+ } else {
+ ch.push_back((unsigned char)(f));
+ }
+ return mystrdup(ch.c_str());
+}
+
+// read in aff file and set flag mode
+int HashMgr::load_config(const char* affpath, const char* key) {
+ int firstline = 1;
+
+ // open the affix file
+ FileMgr* afflst = new FileMgr(affpath, key);
+ if (!afflst) {
+ HUNSPELL_WARNING(
+ stderr, "Error - could not open affix description file %s\n", affpath);
+ return 1;
+ }
+
+ // read in each line ignoring any that do not
+ // start with a known line type indicator
+
+ std::string line;
+ while (afflst->getline(line)) {
+ mychomp(line);
+
+ /* remove byte order mark */
+ if (firstline) {
+ firstline = 0;
+ if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) {
+ line.erase(0, 3);
+ }
+ }
+
+ /* parse in the try string */
+ if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) {
+ if (flag_mode != FLAG_CHAR) {
+ HUNSPELL_WARNING(stderr,
+ "error: line %d: multiple definitions of the FLAG "
+ "affix file parameter\n",
+ afflst->getlinenum());
+ }
+ if (line.find("long") != std::string::npos)
+ flag_mode = FLAG_LONG;
+ if (line.find("num") != std::string::npos)
+ flag_mode = FLAG_NUM;
+ if (line.find("UTF-8") != std::string::npos)
+ flag_mode = FLAG_UNI;
+ if (flag_mode == FLAG_CHAR) {
+ HUNSPELL_WARNING(
+ stderr,
+ "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n",
+ afflst->getlinenum());
+ }
+ }
+
+ if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) {
+ std::string st;
+ if (!parse_string(line, st, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ forbiddenword = decode_flag(st.c_str());
+ }
+
+ if (line.compare(0, 3, "SET", 3) == 0) {
+ if (!parse_string(line, enc, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ if (enc == "UTF-8") {
+ utf8 = 1;
+#ifndef OPENOFFICEORG
+#ifndef MOZILLA_CLIENT
+ initialize_utf_tbl();
+#endif
+#endif
+ } else
+ csconv = get_current_cs(enc);
+ }
+
+ if (line.compare(0, 4, "LANG", 4) == 0) {
+ if (!parse_string(line, lang, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ langnum = get_lang_num(lang);
+ }
+
+ /* parse in the ignored characters (for example, Arabic optional diacritics
+ * characters */
+ if (line.compare(0, 6, "IGNORE", 6) == 0) {
+ if (!parse_array(line, ignorechars, ignorechars_utf16,
+ utf8, afflst->getlinenum())) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) {
+ if (!parse_aliasf(line, afflst)) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) {
+ if (!parse_aliasm(line, afflst)) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0)
+ complexprefixes = 1;
+
+ /* parse in the typical fault correcting table */
+ if (line.compare(0, 3, "REP", 3) == 0) {
+ if (!parse_reptable(line, afflst)) {
+ delete afflst;
+ return 1;
+ }
+ }
+
+ // don't check the full affix file, yet
+ if (((line.compare(0, 3, "SFX", 3) == 0) ||
+ (line.compare(0, 3, "PFX", 3) == 0)) &&
+ line.size() > 3 && isspace(line[3]) &&
+ !reptable.empty()) // (REP table is in the end of Afrikaans aff file)
+ break;
+ }
+
+ if (csconv == NULL)
+ csconv = get_current_cs(SPELL_ENCODING);
+ delete afflst;
+ return 0;
+}
+
+/* parse in the ALIAS table */
+bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) {
+ if (numaliasf != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numaliasf = atoi(std::string(start_piece, iter).c_str());
+ if (numaliasf < 1) {
+ numaliasf = 0;
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ aliasf =
+ (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*));
+ aliasflen =
+ (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short));
+ if (!aliasf || !aliasflen) {
+ numaliasf = 0;
+ if (aliasf)
+ arena_free(aliasf);
+ if (aliasflen)
+ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ return false;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ numaliasf = 0;
+ arena_free(aliasf);
+ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numaliasf lines to read in the remainder of the table */
+ for (int j = 0; j < numaliasf; j++) {
+ std::string nl;
+ aliasf[j] = NULL;
+ aliasflen[j] = 0;
+ i = 0;
+ if (af->getline(nl)) {
+ mychomp(nl);
+ iter = nl.begin();
+ start_piece = mystrsep(nl, iter);
+ bool errored = false;
+ while (!errored && start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) {
+ errored = true;
+ break;
+ }
+ break;
+ }
+ case 1: {
+ std::string piece(start_piece, iter);
+ aliasflen[j] =
+ (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true);
+ std::sort(aliasf[j], aliasf[j] + aliasflen[j]);
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ if (!aliasf[j]) {
+ for (int k = 0; k < j; ++k) {
+ arena_free(aliasf[k]);
+ }
+ arena_free(aliasf);
+ arena_free(aliasflen);
+ aliasf = NULL;
+ aliasflen = NULL;
+ numaliasf = 0;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+ return true;
+}
+
+int HashMgr::is_aliasf() const {
+ return (aliasf != NULL);
+}
+
+int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const {
+ if ((index > 0) && (index <= numaliasf)) {
+ *fvec = aliasf[index - 1];
+ return aliasflen[index - 1];
+ }
+ HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n",
+ af->getlinenum(), index);
+ *fvec = NULL;
+ return 0;
+}
+
+/* parse morph alias definitions */
+bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) {
+ if (numaliasm != 0) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numaliasm = atoi(std::string(start_piece, iter).c_str());
+ if (numaliasm < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ aliasm = (char**)arena_alloc(numaliasm * sizeof(char*));
+ if (!aliasm) {
+ numaliasm = 0;
+ return false;
+ }
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ numaliasm = 0;
+ arena_free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numaliasm lines to read in the remainder of the table */
+ for (int j = 0; j < numaliasm; j++) {
+ std::string nl;
+ aliasm[j] = NULL;
+ if (af->getline(nl)) {
+ mychomp(nl);
+ iter = nl.begin();
+ i = 0;
+ start_piece = mystrsep(nl, iter);
+ bool errored = false;
+ while (!errored && start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) {
+ errored = true;
+ break;
+ }
+ break;
+ }
+ case 1: {
+ // add the remaining of the line
+ std::string::const_iterator end = nl.end();
+ std::string chunk(start_piece, end);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(chunk);
+ else
+ reverseword(chunk);
+ }
+ size_t sl = chunk.length() + 1;
+ aliasm[j] = (char*)arena_alloc(sl);
+ if (aliasm[j]) {
+ memcpy(aliasm[j], chunk.c_str(), sl);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ if (!aliasm[j]) {
+ numaliasm = 0;
+ for (int k = 0; k < j; ++k) {
+ arena_free(aliasm[k]);
+ }
+ arena_free(aliasm);
+ aliasm = NULL;
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ return false;
+ }
+ }
+ return true;
+}
+
+int HashMgr::is_aliasm() const {
+ return (aliasm != NULL);
+}
+
+char* HashMgr::get_aliasm(int index) const {
+ if ((index > 0) && (index <= numaliasm))
+ return aliasm[index - 1];
+ HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index);
+ return NULL;
+}
+
+/* parse in the typical fault correcting table */
+bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) {
+ if (!reptable.empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n",
+ af->getlinenum());
+ return false;
+ }
+ int numrep = -1;
+ int i = 0;
+ int np = 0;
+ std::string::const_iterator iter = line.begin();
+ std::string::const_iterator start_piece = mystrsep(line, iter);
+ while (start_piece != line.end()) {
+ switch (i) {
+ case 0: {
+ np++;
+ break;
+ }
+ case 1: {
+ numrep = atoi(std::string(start_piece, iter).c_str());
+ if (numrep < 1) {
+ HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n",
+ af->getlinenum());
+ return false;
+ }
+ reptable.reserve(numrep);
+ np++;
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(line, iter);
+ }
+ if (np != 2) {
+ HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",
+ af->getlinenum());
+ return false;
+ }
+
+ /* now parse the numrep lines to read in the remainder of the table */
+ for (int j = 0; j < numrep; ++j) {
+ std::string nl;
+ reptable.push_back(replentry());
+ int type = 0;
+ if (af->getline(nl)) {
+ mychomp(nl);
+ iter = nl.begin();
+ i = 0;
+ start_piece = mystrsep(nl, iter);
+ bool errored = false;
+ while (!errored && start_piece != nl.end()) {
+ switch (i) {
+ case 0: {
+ if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) {
+ errored = true;
+ break;
+ }
+ break;
+ }
+ case 1: {
+ if (*start_piece == '^')
+ type = 1;
+ reptable.back().pattern.assign(start_piece + type, iter);
+ mystrrep(reptable.back().pattern, "_", " ");
+ if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') {
+ type += 2;
+ reptable.back().pattern.resize(reptable.back().pattern.size() - 1);
+ }
+ break;
+ }
+ case 2: {
+ reptable.back().outstrings[type].assign(start_piece, iter);
+ mystrrep(reptable.back().outstrings[type], "_", " ");
+ break;
+ }
+ default:
+ break;
+ }
+ ++i;
+ start_piece = mystrsep(nl, iter);
+ }
+ }
+ if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) {
+ HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n",
+ af->getlinenum());
+ reptable.clear();
+ return false;
+ }
+ }
+ return true;
+}
+
+// return replacing table
+const std::vector<replentry>& HashMgr::get_reptable() const {
+ return reptable;
+}
+
+void* HashMgr::arena_alloc(int num_bytes) {
+ static const int MIN_CHUNK_SIZE = 4096;
+ if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) {
+ current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes);
+ arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size));
+ current_chunk_offset = 0;
+ }
+
+ uint8_t* ptr = &arena.back()[current_chunk_offset];
+ current_chunk_offset += num_bytes;
+ outstanding_arena_allocations++;
+ return ptr;
+}
+
+void HashMgr::arena_free(void* ptr) {
+ --outstanding_arena_allocations;
+ assert(outstanding_arena_allocations >= 0);
+}
diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx
new file mode 100644
index 0000000000..f367a1c4a6
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx
@@ -0,0 +1,182 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef HASHMGR_HXX_
+#define HASHMGR_HXX_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "htypes.hxx"
+#include "filemgr.hxx"
+#include "w_char.hxx"
+
+enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
+
+// morphological description of a dictionary item can contain
+// arbitrary number "ph:" (MORPH_PHON) fields to store typical
+// phonetic or other misspellings of that word.
+// ratio of lines/lines with "ph:" in the dic file: 1/MORPH_PHON_RATIO
+#define MORPH_PHON_RATIO 500
+
+class HashMgr {
+ int tablesize;
+ struct hentry** tableptr;
+ flag flag_mode;
+ int complexprefixes;
+ int utf8;
+ unsigned short forbiddenword;
+ int langnum;
+ std::string enc;
+ std::string lang;
+ struct cs_info* csconv;
+ std::string ignorechars;
+ std::vector<w_char> ignorechars_utf16;
+ int numaliasf; // flag vector `compression' with aliases
+ unsigned short** aliasf;
+ unsigned short* aliasflen;
+ int numaliasm; // morphological desciption `compression' with aliases
+ char** aliasm;
+ // reptable created from REP table of aff file and from "ph:" fields
+ // of the dic file. It contains phonetic and other common misspellings
+ // (letters, letter groups and words) for better suggestions
+ std::vector<replentry> reptable;
+
+ public:
+ HashMgr(const char* tpath, const char* apath, const char* key = NULL);
+ ~HashMgr();
+
+ struct hentry* lookup(const char*) const;
+ int hash(const char*) const;
+ struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
+
+ int add(const std::string& word);
+ int add_with_affix(const std::string& word, const std::string& pattern);
+ int remove(const std::string& word);
+private:
+ // Only internal consumers are allowed to arena-allocate.
+ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const;
+public:
+ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const {
+ return decode_flags(result, flags, af, /* arena = */ false);
+ }
+ bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
+ unsigned short decode_flag(const char* flag) const;
+ char* encode_flag(unsigned short flag) const;
+ int is_aliasf() const;
+ int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
+ int is_aliasm() const;
+ char* get_aliasm(int index) const;
+ const std::vector<replentry>& get_reptable() const;
+
+ private:
+ int get_clen_and_captype(const std::string& word, int* captype);
+ int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
+ int load_tables(const char* tpath, const char* key);
+ int add_word(const std::string& word,
+ int wcl,
+ unsigned short* ap,
+ int al,
+ const std::string* desc,
+ bool onlyupcase,
+ int captype);
+ int load_config(const char* affpath, const char* key);
+ bool parse_aliasf(const std::string& line, FileMgr* af);
+ int add_hidden_capitalized_word(const std::string& word,
+ int wcl,
+ unsigned short* flags,
+ int al,
+ const std::string* dp,
+ int captype);
+ bool parse_aliasm(const std::string& line, FileMgr* af);
+ bool parse_reptable(const std::string& line, FileMgr* af);
+ int remove_forbidden_flag(const std::string& word);
+
+ // Our Mozilla fork uses a simple arena allocator for certain strings which
+ // persist for the lifetime of the HashMgr in order to avoid heap fragmentation.
+ // It's a simple bump-allocator, so we can't actually free() memory midway
+ // through the lifecycle, but we have a dummy free() implementation to ensure
+ // that our calls to arena_alloc() and arena_free() are balanced.
+ void* arena_alloc(int num_bytes);
+ void* arena_alloc(int num_bytes) const {
+ return const_cast<HashMgr*>(this)->arena_alloc(num_bytes);
+ }
+ void arena_free(void* ptr);
+
+ std::vector<std::unique_ptr<uint8_t[]>> arena;
+ int current_chunk_size = 0;
+ int current_chunk_offset = 0;
+ int outstanding_arena_allocations = 0;
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/htypes.hxx b/extensions/spellcheck/hunspell/src/htypes.hxx
new file mode 100644
index 0000000000..44366b1d68
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/htypes.hxx
@@ -0,0 +1,75 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef HTYPES_HXX_
+#define HTYPES_HXX_
+
+#define ROTATE_LEN 5
+
+#define ROTATE(v, q) \
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
+
+// hentry options
+#define H_OPT (1 << 0) // is there optional morphological data?
+#define H_OPT_ALIASM (1 << 1) // using alias compression?
+#define H_OPT_PHON (1 << 2) // is there ph: field in the morphological data?
+#define H_OPT_INITCAP (1 << 3) // is dictionary word capitalized?
+
+// see also csutil.hxx
+#define HENTRY_WORD(h) &(h->word[0])
+
+// approx. number of user defined words
+#define USERWORD 1000
+
+#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900)
+# define HUNSPELL_THREAD_LOCAL thread_local
+#else
+# define HUNSPELL_THREAD_LOCAL static
+#endif
+
+struct hentry {
+ unsigned char blen; // word length in bytes
+ unsigned char clen; // word length in characters (different for UTF-8 enc.)
+ short alen; // length of affix flag vector
+ unsigned short* astr; // affix flag vector
+ struct hentry* next; // next word with same hash code
+ struct hentry* next_homonym; // next homonym word (with same hash code)
+ char var; // bit vector of H_OPT hentry options
+ char word[1]; // variable-length word (8-bit or UTF-8 encoding)
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx
new file mode 100644
index 0000000000..4afafdadc1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.cxx
@@ -0,0 +1,2249 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "affixmgr.hxx"
+#include "hunspell.hxx"
+#include "suggestmgr.hxx"
+#include "hunspell.h"
+#include "csutil.hxx"
+
+#include <limits>
+#include <string>
+
+#define MAXWORDUTF8LEN (MAXWORDLEN * 3)
+
+class HunspellImpl
+{
+public:
+ HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL);
+ ~HunspellImpl();
+ int add_dic(const char* dpath, const char* key = NULL);
+ std::vector<std::string> suffix_suggest(const std::string& root_word);
+ std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
+ std::vector<std::string> generate(const std::string& word, const std::string& pattern);
+ std::vector<std::string> stem(const std::string& word);
+ std::vector<std::string> stem(const std::vector<std::string>& morph);
+ std::vector<std::string> analyze(const std::string& word);
+ int get_langnum() const;
+ bool input_conv(const std::string& word, std::string& dest);
+ bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
+ std::vector<std::string> suggest(const std::string& word);
+ const std::string& get_wordchars_cpp() const;
+ const std::vector<w_char>& get_wordchars_utf16() const;
+ const std::string& get_dict_encoding() const;
+ int add(const std::string& word);
+ int add_with_affix(const std::string& word, const std::string& example);
+ int remove(const std::string& word);
+ const std::string& get_version_cpp() const;
+ struct cs_info* get_csconv();
+
+ int spell(const char* word, int* info = NULL, char** root = NULL);
+ int suggest(char*** slst, const char* word);
+ int suffix_suggest(char*** slst, const char* root_word);
+ void free_list(char*** slst, int n);
+ char* get_dic_encoding();
+ int analyze(char*** slst, const char* word);
+ int stem(char*** slst, const char* word);
+ int stem(char*** slst, char** morph, int n);
+ int generate(char*** slst, const char* word, const char* word2);
+ int generate(char*** slst, const char* word, char** desc, int n);
+ const char* get_wordchars() const;
+ const char* get_version() const;
+ int input_conv(const char* word, char* dest, size_t destsize);
+
+private:
+ AffixMgr* pAMgr;
+ std::vector<HashMgr*> m_HMgrs;
+ SuggestMgr* pSMgr;
+ char* affixpath;
+ std::string encoding;
+ struct cs_info* csconv;
+ int langnum;
+ int utf8;
+ int complexprefixes;
+ std::vector<std::string> wordbreak;
+
+private:
+ std::vector<std::string> analyze_internal(const std::string& word);
+ bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL);
+ std::vector<std::string> suggest_internal(const std::string& word,
+ bool& capitalized, size_t& abbreviated, int& captype);
+ void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
+ size_t cleanword2(std::string& dest,
+ std::vector<w_char>& dest_u,
+ const std::string& src,
+ int* pcaptype,
+ size_t* pabbrev);
+ void clean_ignore(std::string& dest, const std::string& src);
+ void mkinitcap(std::string& u8);
+ int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
+ int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
+ void mkallcap(std::string& u8);
+ int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
+ struct hentry* checkword(const std::string& source, int* info, std::string* root);
+ std::string sharps_u8_l1(const std::string& source);
+ hentry*
+ spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root);
+ int is_keepcase(const hentry* rv);
+ void insert_sug(std::vector<std::string>& slst, const std::string& word);
+ void cat_result(std::string& result, const std::string& st);
+ std::vector<std::string> spellml(const std::string& word);
+ std::string get_xml_par(const std::string& par, std::string::size_type pos);
+ std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr);
+ std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag);
+ int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value);
+private:
+ HunspellImpl(const HunspellImpl&);
+ HunspellImpl& operator=(const HunspellImpl&);
+};
+
+HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) {
+ csconv = NULL;
+ utf8 = 0;
+ complexprefixes = 0;
+ affixpath = mystrdup(affpath);
+
+ /* first set up the hash manager */
+ m_HMgrs.push_back(new HashMgr(dpath, affpath, key));
+
+ /* next set up the affix manager */
+ /* it needs access to the hash manager lookup methods */
+ pAMgr = new AffixMgr(affpath, m_HMgrs, key);
+
+ /* get the preferred try string and the dictionary */
+ /* encoding from the Affix Manager for that dictionary */
+ char* try_string = pAMgr->get_try_string();
+ encoding = pAMgr->get_encoding();
+ langnum = pAMgr->get_langnum();
+ utf8 = pAMgr->get_utf8();
+ if (!utf8)
+ csconv = get_current_cs(encoding);
+ complexprefixes = pAMgr->get_complexprefixes();
+ wordbreak = pAMgr->get_breaktable();
+
+ /* and finally set up the suggestion manager */
+ pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
+ if (try_string)
+ free(try_string);
+}
+
+HunspellImpl::~HunspellImpl() {
+ delete pSMgr;
+ delete pAMgr;
+ for (size_t i = 0; i < m_HMgrs.size(); ++i)
+ delete m_HMgrs[i];
+ pSMgr = NULL;
+ pAMgr = NULL;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+ csconv = NULL;
+ if (affixpath)
+ free(affixpath);
+ affixpath = NULL;
+}
+
+// load extra dictionaries
+int HunspellImpl::add_dic(const char* dpath, const char* key) {
+ if (!affixpath)
+ return 1;
+ m_HMgrs.push_back(new HashMgr(dpath, affixpath, key));
+ return 0;
+}
+
+
+// make a copy of src at dest while removing all characters
+// specified in IGNORE rule
+void HunspellImpl::clean_ignore(std::string& dest,
+ const std::string& src) {
+ dest.clear();
+ dest.assign(src);
+ const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL;
+ if (ignoredchars != NULL) {
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(dest, ignoredchars_utf16);
+ } else {
+ remove_ignored_chars(dest, ignoredchars);
+ }
+ }
+}
+
+
+// make a copy of src at destination while removing all leading
+// blanks and removing any trailing periods after recording
+// their presence with the abbreviation flag
+// also since already going through character by character,
+// set the capitalization type
+// return the length of the "cleaned" (and UTF-8 encoded) word
+
+size_t HunspellImpl::cleanword2(std::string& dest,
+ std::vector<w_char>& dest_utf,
+ const std::string& src,
+ int* pcaptype,
+ size_t* pabbrev) {
+ dest.clear();
+ dest_utf.clear();
+
+ // remove IGNORE characters from the string
+ std::string w2;
+ clean_ignore(w2, src);
+
+ const char* q = w2.c_str();
+
+ // first skip over any leading blanks
+ while (*q == ' ')
+ ++q;
+
+ // now strip off any trailing periods (recording their presence)
+ *pabbrev = 0;
+ int nl = strlen(q);
+ while ((nl > 0) && (*(q + nl - 1) == '.')) {
+ nl--;
+ (*pabbrev)++;
+ }
+
+ // if no characters are left it can't be capitalized
+ if (nl <= 0) {
+ *pcaptype = NOCAP;
+ return 0;
+ }
+
+ dest.append(q, nl);
+ nl = dest.size();
+ if (utf8) {
+ u8_u16(dest_utf, dest);
+ *pcaptype = get_captype_utf8(dest_utf, langnum);
+ } else {
+ *pcaptype = get_captype(dest, csconv);
+ }
+ return nl;
+}
+
+void HunspellImpl::cleanword(std::string& dest,
+ const std::string& src,
+ int* pcaptype,
+ int* pabbrev) {
+ dest.clear();
+ const unsigned char* q = (const unsigned char*)src.c_str();
+ int firstcap = 0;
+
+ // first skip over any leading blanks
+ while (*q == ' ')
+ ++q;
+
+ // now strip off any trailing periods (recording their presence)
+ *pabbrev = 0;
+ int nl = strlen((const char*)q);
+ while ((nl > 0) && (*(q + nl - 1) == '.')) {
+ nl--;
+ (*pabbrev)++;
+ }
+
+ // if no characters are left it can't be capitalized
+ if (nl <= 0) {
+ *pcaptype = NOCAP;
+ return;
+ }
+
+ // now determine the capitalization type of the first nl letters
+ int ncap = 0;
+ int nneutral = 0;
+ int nc = 0;
+
+ if (!utf8) {
+ while (nl > 0) {
+ nc++;
+ if (csconv[(*q)].ccase)
+ ncap++;
+ if (csconv[(*q)].cupper == csconv[(*q)].clower)
+ nneutral++;
+ dest.push_back(*q++);
+ nl--;
+ }
+ // remember to terminate the destination string
+ firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
+ } else {
+ std::vector<w_char> t;
+ u8_u16(t, src);
+ for (size_t i = 0; i < t.size(); ++i) {
+ unsigned short idx = (t[i].h << 8) + t[i].l;
+ unsigned short low = unicodetolower(idx, langnum);
+ if (idx != low)
+ ncap++;
+ if (unicodetoupper(idx, langnum) == low)
+ nneutral++;
+ }
+ u16_u8(dest, t);
+ if (ncap) {
+ unsigned short idx = (t[0].h << 8) + t[0].l;
+ firstcap = (idx != unicodetolower(idx, langnum));
+ }
+ }
+
+ // now finally set the captype
+ if (ncap == 0) {
+ *pcaptype = NOCAP;
+ } else if ((ncap == 1) && firstcap) {
+ *pcaptype = INITCAP;
+ } else if ((ncap == nc) || ((ncap + nneutral) == nc)) {
+ *pcaptype = ALLCAP;
+ } else if ((ncap > 1) && firstcap) {
+ *pcaptype = HUHINITCAP;
+ } else {
+ *pcaptype = HUHCAP;
+ }
+}
+
+void HunspellImpl::mkallcap(std::string& u8) {
+ if (utf8) {
+ std::vector<w_char> u16;
+ u8_u16(u16, u8);
+ ::mkallcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkallcap(u8, csconv);
+ }
+}
+
+int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkallsmall_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkallsmall(u8, csconv);
+ }
+ return u8.size();
+}
+
+// convert UTF-8 sharp S codes to latin 1
+std::string HunspellImpl::sharps_u8_l1(const std::string& source) {
+ std::string dest(source);
+ mystrrep(dest, "\xC3\x9F", "\xDF");
+ return dest;
+}
+
+// recursive search for right ss - sharp s permutations
+hentry* HunspellImpl::spellsharps(std::string& base,
+ size_t n_pos,
+ int n,
+ int repnum,
+ int* info,
+ std::string* root) {
+ size_t pos = base.find("ss", n_pos);
+ if (pos != std::string::npos && (n < MAXSHARPS)) {
+ base[pos] = '\xC3';
+ base[pos + 1] = '\x9F';
+ hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root);
+ if (h)
+ return h;
+ base[pos] = 's';
+ base[pos + 1] = 's';
+ h = spellsharps(base, pos + 2, n + 1, repnum, info, root);
+ if (h)
+ return h;
+ } else if (repnum > 0) {
+ if (utf8)
+ return checkword(base, info, root);
+ std::string tmp(sharps_u8_l1(base));
+ return checkword(tmp, info, root);
+ }
+ return NULL;
+}
+
+int HunspellImpl::is_keepcase(const hentry* rv) {
+ return pAMgr && rv->astr && pAMgr->get_keepcase() &&
+ TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
+}
+
+/* insert a word to the beginning of the suggestion array */
+void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) {
+ slst.insert(slst.begin(), word);
+}
+
+bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) {
+ bool r = spell_internal(word, info, root);
+ if (r && root) {
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ std::string wspace;
+ if (rl->conv(*root, wspace)) {
+ *root = wspace;
+ }
+ }
+ }
+ return r;
+}
+
+bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) {
+ struct hentry* rv = NULL;
+
+ int info2 = 0;
+ if (!info)
+ info = &info2;
+ else
+ *info = 0;
+
+ // Hunspell supports XML input of the simplified API (see manual)
+ if (word == SPELL_XML)
+ return true;
+ if (utf8) {
+ if (word.size() >= MAXWORDUTF8LEN)
+ return false;
+ } else {
+ if (word.size() >= MAXWORDLEN)
+ return false;
+ }
+ int captype = NOCAP;
+ size_t abbv = 0;
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ bool convstatus = rl ? rl->conv(word, wspace) : false;
+ if (convstatus)
+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);
+ }
+
+#ifdef MOZILLA_CLIENT
+ // accept the abbreviated words without dots
+ // workaround for the incomplete tokenization of Mozilla
+ abbv = 1;
+#endif
+
+ if (wl == 0 || m_HMgrs.empty())
+ return true;
+ if (root)
+ root->clear();
+
+ // allow numbers with dots, dashes and commas (but forbid double separators:
+ // "..", "--" etc.)
+ enum { NBEGIN, NNUM, NSEP };
+ int nstate = NBEGIN;
+ size_t i;
+
+ for (i = 0; (i < wl); i++) {
+ if ((scw[i] <= '9') && (scw[i] >= '0')) {
+ nstate = NNUM;
+ } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) {
+ if ((nstate == NSEP) || (i == 0))
+ break;
+ nstate = NSEP;
+ } else
+ break;
+ }
+ if ((i == wl) && (nstate == NNUM))
+ return true;
+
+ switch (captype) {
+ case HUHCAP:
+ /* FALLTHROUGH */
+ case HUHINITCAP:
+ *info |= SPELL_ORIGCAP;
+ /* FALLTHROUGH */
+ case NOCAP:
+ rv = checkword(scw, info, root);
+ if ((abbv) && !(rv)) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer, info, root);
+ }
+ break;
+ case ALLCAP: {
+ *info |= SPELL_ORIGCAP;
+ rv = checkword(scw, info, root);
+ if (rv)
+ break;
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer, info, root);
+ if (rv)
+ break;
+ }
+ // Spec. prefix handling for Catalan, French, Italian:
+ // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
+ size_t apos = pAMgr ? scw.find('\'') : std::string::npos;
+ if (apos != std::string::npos) {
+ mkallsmall2(scw, sunicw);
+ //conversion may result in string with different len to pre-mkallsmall2
+ //so re-scan
+ if (apos != std::string::npos && apos < scw.size() - 1) {
+ std::string part1 = scw.substr(0, apos+1);
+ std::string part2 = scw.substr(apos+1);
+ if (utf8) {
+ std::vector<w_char> part1u, part2u;
+ u8_u16(part1u, part1);
+ u8_u16(part2u, part2);
+ mkinitcap2(part2, part2u);
+ scw = part1 + part2;
+ sunicw = part1u;
+ sunicw.insert(sunicw.end(), part2u.begin(), part2u.end());
+ rv = checkword(scw, info, root);
+ if (rv)
+ break;
+ } else {
+ mkinitcap2(part2, sunicw);
+ scw = part1 + part2;
+ rv = checkword(scw, info, root);
+ if (rv)
+ break;
+ }
+ mkinitcap2(scw, sunicw);
+ rv = checkword(scw, info, root);
+ if (rv)
+ break;
+ }
+ }
+ if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) {
+
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ if (!rv) {
+ mkinitcap2(scw, sunicw);
+ rv = spellsharps(scw, 0, 0, 0, info, root);
+ }
+ if ((abbv) && !(rv)) {
+ u8buffer.push_back('.');
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ if (!rv) {
+ u8buffer = std::string(scw);
+ u8buffer.push_back('.');
+ rv = spellsharps(u8buffer, 0, 0, 0, info, root);
+ }
+ }
+ if (rv)
+ break;
+ }
+ }
+ /* FALLTHROUGH */
+ case INITCAP: {
+ // handle special capitalization of dotted I
+ bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0);
+ *info |= SPELL_ORIGCAP;
+ if (captype == ALLCAP) {
+ mkallsmall2(scw, sunicw);
+ mkinitcap2(scw, sunicw);
+ if (Idot)
+ scw.replace(0, 1, "\xc4\xb0");
+ }
+ if (captype == INITCAP)
+ *info |= SPELL_INITCAP;
+ rv = checkword(scw, info, root);
+ if (captype == INITCAP)
+ *info &= ~SPELL_INITCAP;
+ // forbid bad capitalization
+ // (for example, ijs -> Ijs instead of IJs in Dutch)
+ // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
+ if (*info & SPELL_FORBIDDEN) {
+ rv = NULL;
+ break;
+ }
+ if (rv && is_keepcase(rv) && (captype == ALLCAP))
+ rv = NULL;
+ if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh))
+ break;
+
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+
+ rv = checkword(u8buffer, info, root);
+ if (abbv && !rv) {
+ u8buffer.push_back('.');
+ rv = checkword(u8buffer, info, root);
+ if (!rv) {
+ u8buffer = scw;
+ u8buffer.push_back('.');
+ if (captype == INITCAP)
+ *info |= SPELL_INITCAP;
+ rv = checkword(u8buffer, info, root);
+ if (captype == INITCAP)
+ *info &= ~SPELL_INITCAP;
+ if (rv && is_keepcase(rv) && (captype == ALLCAP))
+ rv = NULL;
+ break;
+ }
+ }
+ if (rv && is_keepcase(rv) &&
+ ((captype == ALLCAP) ||
+ // if CHECKSHARPS: KEEPCASE words with \xDF are allowed
+ // in INITCAP form, too.
+ !(pAMgr->get_checksharps() &&
+ ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) ||
+ (!utf8 && u8buffer.find('\xDF') != std::string::npos)))))
+ rv = NULL;
+ break;
+ }
+ }
+
+ if (rv) {
+ if (pAMgr && pAMgr->get_warn() && rv->astr &&
+ TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
+ *info |= SPELL_WARN;
+ if (pAMgr->get_forbidwarn())
+ return false;
+ return true;
+ }
+ return true;
+ }
+
+ // recursive breaking at break points
+ if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) {
+
+ int nbr = 0;
+ wl = scw.size();
+
+ // calculate break points for recursion limit
+ for (size_t j = 0; j < wordbreak.size(); ++j) {
+ size_t pos = 0;
+ while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) {
+ ++nbr;
+ pos += wordbreak[j].size();
+ }
+ }
+ if (nbr >= 10)
+ return false;
+
+ // check boundary patterns (^begin and end$)
+ for (size_t j = 0; j < wordbreak.size(); ++j) {
+ size_t plen = wordbreak[j].size();
+ if (plen == 1 || plen > wl)
+ continue;
+
+ if (wordbreak[j][0] == '^' &&
+ scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1)))
+ return true;
+
+ if (wordbreak[j][plen - 1] == '$' &&
+ scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) {
+ std::string suffix(scw.substr(wl - plen + 1));
+ scw.resize(wl - plen + 1);
+ if (spell(scw))
+ return true;
+ scw.append(suffix);
+ }
+ }
+
+ // other patterns
+ for (size_t j = 0; j < wordbreak.size(); ++j) {
+ size_t plen = wordbreak[j].size();
+ size_t found = scw.find(wordbreak[j]);
+ if ((found > 0) && (found < wl - plen)) {
+ size_t found2 = scw.find(wordbreak[j], found + 1);
+ // try to break at the second occurance
+ // to recognize dictionary words with wordbreak
+ if (found2 > 0 && (found2 < wl - plen))
+ found = found2;
+ if (!spell(scw.substr(found + plen)))
+ continue;
+ std::string suffix(scw.substr(found));
+ scw.resize(found);
+ // examine 2 sides of the break point
+ if (spell(scw))
+ return true;
+ scw.append(suffix);
+
+ // LANG_hu: spec. dash rule
+ if (langnum == LANG_hu && wordbreak[j] == "-") {
+ suffix = scw.substr(found + 1);
+ scw.resize(found + 1);
+ if (spell(scw))
+ return true; // check the first part with dash
+ scw.append(suffix);
+ }
+ // end of LANG specific region
+ }
+ }
+
+ // other patterns (break at first break point)
+ for (size_t j = 0; j < wordbreak.size(); ++j) {
+ size_t plen = wordbreak[j].size();
+ size_t found = scw.find(wordbreak[j]);
+ if ((found > 0) && (found < wl - plen)) {
+ if (!spell(scw.substr(found + plen)))
+ continue;
+ std::string suffix(scw.substr(found));
+ scw.resize(found);
+ // examine 2 sides of the break point
+ if (spell(scw))
+ return true;
+ scw.append(suffix);
+
+ // LANG_hu: spec. dash rule
+ if (langnum == LANG_hu && wordbreak[j] == "-") {
+ suffix = scw.substr(found + 1);
+ scw.resize(found + 1);
+ if (spell(scw))
+ return true; // check the first part with dash
+ scw.append(suffix);
+ }
+ // end of LANG specific region
+ }
+ }
+ }
+
+ return false;
+}
+
+struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) {
+ std::string w2;
+ const char* word;
+ int len;
+
+ // remove IGNORE characters from the string
+ clean_ignore(w2, w);
+
+ word = w2.c_str();
+ len = w2.size();
+
+ if (!len)
+ return NULL;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ }
+
+ word = w2.c_str();
+
+ // look word in hash table
+ struct hentry* he = NULL;
+ for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
+ he = m_HMgrs[i]->lookup(word);
+
+ // check forbidden and onlyincompound words
+ if ((he) && (he->astr) && (pAMgr) &&
+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+ if (info)
+ *info |= SPELL_FORBIDDEN;
+ // LANG_hu section: set dash information for suggestions
+ if (langnum == LANG_hu) {
+ if (pAMgr->get_compoundflag() &&
+ TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
+ if (info)
+ *info |= SPELL_COMPOUND;
+ }
+ }
+ return NULL;
+ }
+
+ // he = next not needaffix, onlyincompound homonym or onlyupcase word
+ while (he && (he->astr) && pAMgr &&
+ ((pAMgr->get_needaffix() &&
+ TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
+ (pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) &&
+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))))
+ he = he->next_homonym;
+ }
+
+ // check with affixes
+ if (!he && pAMgr) {
+ // try stripping off affixes */
+ he = pAMgr->affix_check(word, len, 0);
+
+ // check compound restriction and onlyupcase
+ if (he && he->astr &&
+ ((pAMgr->get_onlyincompound() &&
+ TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
+ (info && (*info & SPELL_INITCAP) &&
+ TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
+ he = NULL;
+ }
+
+ if (he) {
+ if ((he->astr) && (pAMgr) &&
+ TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
+ if (info)
+ *info |= SPELL_FORBIDDEN;
+ return NULL;
+ }
+ if (root) {
+ root->assign(he->word);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(*root);
+ else
+ reverseword(*root);
+ }
+ }
+ // try check compound word
+ } else if (pAMgr->get_compound()) {
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info);
+ // LANG_hu section: `moving rule' with last dash
+ if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) {
+ std::string dup(word, len - 1);
+ he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info);
+ }
+ // end of LANG specific region
+ if (he) {
+ if (root) {
+ root->assign(he->word);
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(*root);
+ else
+ reverseword(*root);
+ }
+ }
+ if (info)
+ *info |= SPELL_COMPOUND;
+ }
+ }
+ }
+
+ return he;
+}
+
+std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
+ bool capwords;
+ size_t abbv;
+ int captype;
+ std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype);
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ if (utf8)
+ reverseword_utf(slst[j]);
+ else
+ reverseword(slst[j]);
+ }
+ }
+
+ // capitalize
+ if (capwords)
+ for (size_t j = 0; j < slst.size(); ++j) {
+ mkinitcap(slst[j]);
+ }
+
+ // expand suggestions with dot(s)
+ if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ slst[j].append(word.substr(word.size() - abbv));
+ }
+ }
+
+ // remove bad capitalized and forbidden forms
+ if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
+ switch (captype) {
+ case INITCAP:
+ case ALLCAP: {
+ size_t l = 0;
+ for (size_t j = 0; j < slst.size(); ++j) {
+ if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
+ std::string s;
+ std::vector<w_char> w;
+ if (utf8) {
+ u8_u16(w, slst[j]);
+ } else {
+ s = slst[j];
+ }
+ mkallsmall2(s, w);
+ if (spell(s)) {
+ slst[l] = s;
+ ++l;
+ } else {
+ mkinitcap2(s, w);
+ if (spell(s)) {
+ slst[l] = s;
+ ++l;
+ }
+ }
+ } else {
+ slst[l] = slst[j];
+ ++l;
+ }
+ }
+ slst.resize(l);
+ }
+ }
+ }
+
+ // remove duplications
+ size_t l = 0;
+ for (size_t j = 0; j < slst.size(); ++j) {
+ slst[l] = slst[j];
+ for (size_t k = 0; k < l; ++k) {
+ if (slst[k] == slst[j]) {
+ --l;
+ break;
+ }
+ }
+ ++l;
+ }
+ slst.resize(l);
+
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ for (size_t i = 0; rl && i < slst.size(); ++i) {
+ std::string wspace;
+ if (rl->conv(slst[i], wspace)) {
+ slst[i] = wspace;
+ }
+ }
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word,
+ bool& capwords, size_t& abbv, int& captype) {
+ captype = NOCAP;
+ abbv = 0;
+ capwords = false;
+
+ std::vector<std::string> slst;
+
+ int onlycmpdsug = 0;
+ if (!pSMgr || m_HMgrs.empty())
+ return slst;
+
+ // process XML input of the simplified API (see manual)
+ if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
+ return spellml(word);
+ }
+ if (utf8) {
+ if (word.size() >= MAXWORDUTF8LEN)
+ return slst;
+ } else {
+ if (word.size() >= MAXWORDLEN)
+ return slst;
+ }
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ bool convstatus = rl ? rl->conv(word, wspace) : false;
+ if (convstatus)
+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);
+
+ if (wl == 0)
+ return slst;
+ }
+
+ bool good = false;
+
+ clock_t timelimit;
+ // initialize in every suggestion call
+ timelimit = clock();
+
+ // check capitalized form for FORCEUCASE
+ if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
+ int info = SPELL_ORIGCAP;
+ if (checkword(scw, &info, NULL)) {
+ std::string form(scw);
+ mkinitcap(form);
+ slst.push_back(form);
+ return slst;
+ }
+ }
+
+ switch (captype) {
+ case NOCAP: {
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ if (abbv) {
+ std::string wspace(scw);
+ wspace.push_back('.');
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ }
+ break;
+ }
+
+ case INITCAP: {
+ capwords = true;
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ break;
+ }
+ case HUHINITCAP:
+ capwords = true;
+ /* FALLTHROUGH */
+ case HUHCAP: {
+ good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ // something.The -> something. The
+ size_t dot_pos = scw.find('.');
+ if (dot_pos != std::string::npos) {
+ std::string postdot = scw.substr(dot_pos + 1);
+ int captype_;
+ if (utf8) {
+ std::vector<w_char> postdotu;
+ u8_u16(postdotu, postdot);
+ captype_ = get_captype_utf8(postdotu, langnum);
+ } else {
+ captype_ = get_captype(postdot, csconv);
+ }
+ if (captype_ == INITCAP) {
+ std::string str(scw);
+ str.insert(dot_pos + 1, 1, ' ');
+ insert_sug(slst, str);
+ }
+ }
+
+ std::string wspace;
+
+ if (captype == HUHINITCAP) {
+ // TheOpenOffice.org -> The OpenOffice.org
+ wspace = scw;
+ mkinitsmall2(wspace, sunicw);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ }
+ wspace = scw;
+ mkallsmall2(wspace, sunicw);
+ if (spell(wspace.c_str()))
+ insert_sug(slst, wspace);
+ size_t prevns = slst.size();
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ if (captype == HUHINITCAP) {
+ mkinitcap2(wspace, sunicw);
+ if (spell(wspace.c_str()))
+ insert_sug(slst, wspace);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ }
+ // aNew -> "a New" (instead of "a new")
+ for (size_t j = prevns; j < slst.size(); ++j) {
+ const char* space = strchr(slst[j].c_str(), ' ');
+ if (space) {
+ size_t slen = strlen(space + 1);
+ // different case after space (need capitalisation)
+ if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
+ std::string first(slst[j].c_str(), space + 1);
+ std::string second(space + 1);
+ std::vector<w_char> w;
+ if (utf8)
+ u8_u16(w, second);
+ mkinitcap2(second, w);
+ // set as first suggestion
+ slst.erase(slst.begin() + j);
+ slst.insert(slst.begin(), first + second);
+ }
+ }
+ }
+ break;
+ }
+
+ case ALLCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str()))
+ insert_sug(slst, wspace);
+ mkinitcap2(wspace, sunicw);
+ good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ for (size_t j = 0; j < slst.size(); ++j) {
+ mkallcap(slst[j]);
+ if (pAMgr && pAMgr->get_checksharps()) {
+ if (utf8) {
+ mystrrep(slst[j], "\xC3\x9F", "SS");
+ } else {
+ mystrrep(slst[j], "\xDF", "SS");
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ // LANG_hu section: replace '-' with ' ' in Hungarian
+ if (langnum == LANG_hu) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ size_t pos = slst[j].find('-');
+ if (pos != std::string::npos) {
+ int info;
+ std::string w(slst[j].substr(0, pos));
+ w.append(slst[j].substr(pos + 1));
+ (void)spell(w, &info, NULL);
+ if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
+ slst[j][pos] = ' ';
+ } else
+ slst[j][pos] = '-';
+ }
+ }
+ }
+ // END OF LANG_hu section
+ // try ngram approach since found nothing good suggestion
+ if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) {
+ switch (captype) {
+ case NOCAP: {
+ pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ break;
+ }
+ /* FALLTHROUGH */
+ case HUHINITCAP:
+ capwords = true;
+ /* FALLTHROUGH */
+ case HUHCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ break;
+ }
+ case INITCAP: {
+ capwords = true;
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ break;
+ }
+ case ALLCAP: {
+ std::string wspace(scw);
+ mkallsmall2(wspace, sunicw);
+ size_t oldns = slst.size();
+ pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP);
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ for (size_t j = oldns; j < slst.size(); ++j) {
+ mkallcap(slst[j]);
+ }
+ break;
+ }
+ }
+ }
+
+ // try dash suggestion (Afo-American -> Afro-American)
+ // Note: LibreOffice was modified to treat dashes as word
+ // characters to check "scot-free" etc. word forms, but
+ // we need to handle suggestions for "Afo-American", etc.,
+ // while "Afro-American" is missing from the dictionary.
+ // TODO avoid possible overgeneration
+ size_t dash_pos = scw.find('-');
+ if (dash_pos != std::string::npos) {
+ int nodashsug = 1;
+ for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) {
+ if (slst[j].find('-') != std::string::npos)
+ nodashsug = 0;
+ }
+
+ size_t prev_pos = 0;
+ bool last = false;
+
+ while (!good && nodashsug && !last) {
+ if (dash_pos == scw.size())
+ last = 1;
+ std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos);
+ if (!spell(chunk.c_str())) {
+ std::vector<std::string> nlst = suggest(chunk.c_str());
+ if (clock() > timelimit + TIMELIMIT_GLOBAL)
+ return slst;
+ for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) {
+ std::string wspace = scw.substr(0, prev_pos);
+ wspace.append(*j);
+ if (!last) {
+ wspace.append("-");
+ wspace.append(scw.substr(dash_pos + 1));
+ }
+ int info = 0;
+ if (pAMgr && pAMgr->get_forbiddenword())
+ checkword(wspace, &info, NULL);
+ if (!(info & SPELL_FORBIDDEN))
+ insert_sug(slst, wspace);
+ }
+ nodashsug = 0;
+ }
+ if (!last) {
+ prev_pos = dash_pos + 1;
+ dash_pos = scw.find('-', prev_pos);
+ }
+ if (dash_pos == std::string::npos)
+ dash_pos = scw.size();
+ }
+ }
+ return slst;
+}
+
+const std::string& HunspellImpl::get_dict_encoding() const {
+ return encoding;
+}
+
+std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) {
+ std::vector<std::string> slst;
+
+ std::string result2;
+ if (desc.empty())
+ return slst;
+ for (size_t i = 0; i < desc.size(); ++i) {
+
+ std::string result;
+
+ // add compound word parts (except the last one)
+ const char* s = desc[i].c_str();
+ const char* part = strstr(s, MORPH_PART);
+ if (part) {
+ const char* nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ std::string field;
+ copy_field(field, part, MORPH_PART);
+ result.append(field);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ std::string tok(s);
+ size_t alt = 0;
+ while ((alt = tok.find(" | ", alt)) != std::string::npos) {
+ tok[alt + 1] = MSEP_ALT;
+ }
+ std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
+ for (size_t k = 0; k < pl.size(); ++k) {
+ // add derivational suffixes
+ if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) {
+ // remove inflectional suffixes
+ const size_t is = pl[k].find(MORPH_INFL_SFX);
+ if (is != std::string::npos)
+ pl[k].resize(is);
+ std::vector<std::string> singlepl;
+ singlepl.push_back(pl[k]);
+ std::string sg = pSMgr->suggest_gen(singlepl, pl[k]);
+ if (!sg.empty()) {
+ std::vector<std::string> gen = line_tok(sg, MSEP_REC);
+ for (size_t j = 0; j < gen.size(); ++j) {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ result2.append(gen[j]);
+ }
+ }
+ } else {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) {
+ std::string field;
+ copy_field(field, pl[k], MORPH_SURF_PFX);
+ result2.append(field);
+ }
+ std::string field;
+ copy_field(field, pl[k], MORPH_STEM);
+ result2.append(field);
+ }
+ }
+ }
+ slst = line_tok(result2, MSEP_REC);
+ uniqlist(slst);
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::stem(const std::string& word) {
+ return stem(analyze(word));
+}
+
+const std::string& HunspellImpl::get_wordchars_cpp() const {
+ return pAMgr->get_wordchars();
+}
+
+const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
+ return pAMgr->get_wordchars_utf16();
+}
+
+void HunspellImpl::mkinitcap(std::string& u8) {
+ if (utf8) {
+ std::vector<w_char> u16;
+ u8_u16(u16, u8);
+ ::mkinitcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitcap(u8, csconv);
+ }
+}
+
+int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkinitcap_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitcap(u8, csconv);
+ }
+ return u8.size();
+}
+
+int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
+ if (utf8) {
+ ::mkinitsmall_utf(u16, langnum);
+ u16_u8(u8, u16);
+ } else {
+ ::mkinitsmall(u8, csconv);
+ }
+ return u8.size();
+}
+
+int HunspellImpl::add(const std::string& word) {
+ if (!m_HMgrs.empty())
+ return m_HMgrs[0]->add(word);
+ return 0;
+}
+
+int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) {
+ if (!m_HMgrs.empty())
+ return m_HMgrs[0]->add_with_affix(word, example);
+ return 0;
+}
+
+int HunspellImpl::remove(const std::string& word) {
+ if (!m_HMgrs.empty())
+ return m_HMgrs[0]->remove(word);
+ return 0;
+}
+
+const std::string& HunspellImpl::get_version_cpp() const {
+ return pAMgr->get_version();
+}
+
+struct cs_info* HunspellImpl::get_csconv() {
+ return csconv;
+}
+
+void HunspellImpl::cat_result(std::string& result, const std::string& st) {
+ if (!st.empty()) {
+ if (!result.empty())
+ result.append("\n");
+ result.append(st);
+ }
+}
+
+std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
+ std::vector<std::string> slst = analyze_internal(word);
+ // output conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
+ if (rl) {
+ for (size_t i = 0; rl && i < slst.size(); ++i) {
+ std::string wspace;
+ if (rl->conv(slst[i], wspace)) {
+ slst[i] = wspace;
+ }
+ }
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) {
+ std::vector<std::string> slst;
+ if (!pSMgr || m_HMgrs.empty())
+ return slst;
+ if (utf8) {
+ if (word.size() >= MAXWORDUTF8LEN)
+ return slst;
+ } else {
+ if (word.size() >= MAXWORDLEN)
+ return slst;
+ }
+ int captype = NOCAP;
+ size_t abbv = 0;
+ size_t wl = 0;
+
+ std::string scw;
+ std::vector<w_char> sunicw;
+
+ // input conversion
+ RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
+ {
+ std::string wspace;
+
+ bool convstatus = rl ? rl->conv(word, wspace) : false;
+ if (convstatus)
+ wl = cleanword2(scw, sunicw, wspace, &captype, &abbv);
+ else
+ wl = cleanword2(scw, sunicw, word, &captype, &abbv);
+ }
+
+ if (wl == 0) {
+ if (abbv) {
+ scw.clear();
+ for (wl = 0; wl < abbv; wl++)
+ scw.push_back('.');
+ abbv = 0;
+ } else
+ return slst;
+ }
+
+ std::string result;
+
+ size_t n = 0;
+ // test numbers
+ // LANG_hu section: set dash information for suggestions
+ if (langnum == LANG_hu) {
+ size_t n2 = 0;
+ size_t n3 = 0;
+
+ while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) ||
+ (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) {
+ n++;
+ if ((scw[n] == '.') || (scw[n] == ',')) {
+ if (((n2 == 0) && (n > 3)) ||
+ ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ','))))
+ break;
+ n2++;
+ n3 = n;
+ }
+ }
+
+ if ((n == wl) && (n3 > 0) && (n - n3 > 3))
+ return slst;
+ if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) &&
+ checkword(scw.substr(n), NULL, NULL))) {
+ result.append(scw);
+ result.resize(n - 1);
+ if (n == wl)
+ cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1)));
+ else {
+ std::string chunk = scw.substr(n - 1, 1);
+ cat_result(result, pSMgr->suggest_morph(chunk));
+ result.push_back('+'); // XXX SPEC. MORPHCODE
+ cat_result(result, pSMgr->suggest_morph(scw.substr(n)));
+ }
+ return line_tok(result, MSEP_REC);
+ }
+ }
+ // END OF LANG_hu section
+
+ switch (captype) {
+ case HUHCAP:
+ case HUHINITCAP:
+ case NOCAP: {
+ cat_result(result, pSMgr->suggest_morph(scw));
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ }
+ break;
+ }
+ case INITCAP: {
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ cat_result(result, pSMgr->suggest_morph(scw));
+ if (abbv) {
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+
+ u8buffer = scw;
+ u8buffer.push_back('.');
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ }
+ break;
+ }
+ case ALLCAP: {
+ cat_result(result, pSMgr->suggest_morph(scw));
+ if (abbv) {
+ std::string u8buffer(scw);
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ }
+ mkallsmall2(scw, sunicw);
+ std::string u8buffer(scw);
+ mkinitcap2(scw, sunicw);
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ cat_result(result, pSMgr->suggest_morph(scw));
+ if (abbv) {
+ u8buffer.push_back('.');
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+
+ u8buffer = scw;
+ u8buffer.push_back('.');
+
+ cat_result(result, pSMgr->suggest_morph(u8buffer));
+ }
+ break;
+ }
+ }
+
+ if (!result.empty()) {
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(result);
+ else
+ reverseword(result);
+ }
+ return line_tok(result, MSEP_REC);
+ }
+
+ // compound word with dash (HU) I18n
+ // LANG_hu section: set dash information for suggestions
+
+ size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos;
+ if (dash_pos != std::string::npos) {
+ int nresult = 0;
+
+ std::string part1 = scw.substr(0, dash_pos);
+ std::string part2 = scw.substr(dash_pos+1);
+
+ // examine 2 sides of the dash
+ if (part2.empty()) { // base word ending with dash
+ if (spell(part1)) {
+ std::string p = pSMgr->suggest_morph(part1);
+ if (!p.empty()) {
+ slst = line_tok(p, MSEP_REC);
+ return slst;
+ }
+ }
+ } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat.
+ if (spell(part1) && (spell("-e"))) {
+ std::string st = pSMgr->suggest_morph(part1);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ result.push_back('+'); // XXX spec. separator in MORPHCODE
+ st = pSMgr->suggest_morph("-e");
+ if (!st.empty()) {
+ result.append(st);
+ }
+ return line_tok(result, MSEP_REC);
+ }
+ } else {
+ // first word ending with dash: word- XXX ???
+ part1.push_back(' ');
+ nresult = spell(part1);
+ part1.erase(part1.size() - 1);
+ if (nresult && spell(part2) &&
+ ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) {
+ std::string st = pSMgr->suggest_morph(part1);
+ if (!st.empty()) {
+ result.append(st);
+ result.push_back('+'); // XXX spec. separator in MORPHCODE
+ }
+ st = pSMgr->suggest_morph(part2);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ return line_tok(result, MSEP_REC);
+ }
+ }
+ // affixed number in correct word
+ if (nresult && (dash_pos > 0) &&
+ (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) ||
+ (scw[dash_pos - 1] == '.'))) {
+ n = 1;
+ if (scw[dash_pos - n] == '.')
+ n++;
+ // search first not a number character to left from dash
+ while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) &&
+ (n < 6)) {
+ n++;
+ }
+ if (dash_pos < n)
+ n--;
+ // numbers: valami1000000-hoz
+ // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
+ // 56-hoz, 6-hoz
+ for (; n >= 1; n--) {
+ if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') {
+ continue;
+ }
+ std::string chunk = scw.substr(dash_pos - n);
+ if (checkword(chunk, NULL, NULL)) {
+ result.append(chunk);
+ std::string st = pSMgr->suggest_morph(chunk);
+ if (!st.empty()) {
+ result.append(st);
+ }
+ return line_tok(result, MSEP_REC);
+ }
+ }
+ }
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) {
+ std::vector<std::string> slst;
+ if (!pSMgr || pl.empty())
+ return slst;
+ std::vector<std::string> pl2 = analyze(word);
+ int captype = NOCAP;
+ int abbv = 0;
+ std::string cw;
+ cleanword(cw, word, &captype, &abbv);
+ std::string result;
+
+ for (size_t i = 0; i < pl.size(); ++i) {
+ cat_result(result, pSMgr->suggest_gen(pl2, pl[i]));
+ }
+
+ if (!result.empty()) {
+ // allcap
+ if (captype == ALLCAP)
+ mkallcap(result);
+
+ // line split
+ slst = line_tok(result, MSEP_REC);
+
+ // capitalize
+ if (captype == INITCAP || captype == HUHINITCAP) {
+ for (size_t j = 0; j < slst.size(); ++j) {
+ mkinitcap(slst[j]);
+ }
+ }
+
+ // temporary filtering of prefix related errors (eg.
+ // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
+ std::vector<std::string>::iterator it = slst.begin();
+ while (it != slst.end()) {
+ if (!spell(*it)) {
+ it = slst.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) {
+ std::vector<std::string> pl = analyze(pattern);
+ std::vector<std::string> slst = generate(word, pl);
+ uniqlist(slst);
+ return slst;
+}
+
+// minimal XML parser functions
+std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) {
+ std::string dest;
+ if (pos == std::string::npos)
+ return dest;
+ const char* par = in_par.c_str() + pos;
+ char end = *par;
+ if (end == '>')
+ end = '<';
+ else if (end != '\'' && end != '"')
+ return dest; // bad XML
+ for (par++; *par != '\0' && *par != end; ++par) {
+ dest.push_back(*par);
+ }
+ mystrrep(dest, "&lt;", "<");
+ mystrrep(dest, "&amp;", "&");
+ return dest;
+}
+
+int HunspellImpl::get_langnum() const {
+ return langnum;
+}
+
+bool HunspellImpl::input_conv(const std::string& word, std::string& dest) {
+ RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
+ if (rl) {
+ return rl->conv(word, dest);
+ }
+ dest.assign(word);
+ return false;
+}
+
+// return the beginning of the element (attr == NULL) or the attribute
+std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) {
+ if (pos == std::string::npos)
+ return std::string::npos;
+
+ std::string::size_type endpos = s.find('>', pos);
+ if (attr == NULL)
+ return endpos;
+ while (true) {
+ pos = s.find(attr, pos);
+ if (pos == std::string::npos || pos >= endpos)
+ return std::string::npos;
+ if (s[pos - 1] == ' ' || s[pos - 1] == '\n')
+ break;
+ pos += strlen(attr);
+ }
+ return pos + strlen(attr);
+}
+
+int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos,
+ const char* attr,
+ const char* value) {
+ std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr));
+ if (cw == value)
+ return 1;
+ return 0;
+}
+
+std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) {
+ std::vector<std::string> slst;
+ if (pos == std::string::npos)
+ return slst;
+ while (true) {
+ pos = list.find(tag, pos);
+ if (pos == std::string::npos)
+ break;
+ std::string cw = get_xml_par(list, pos + strlen(tag) - 1);
+ if (cw.empty()) {
+ break;
+ }
+ slst.push_back(cw);
+ ++pos;
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) {
+ std::vector<std::string> slst;
+
+ std::string::size_type qpos = in_word.find("<query");
+ if (qpos == std::string::npos)
+ return slst; // bad XML input
+
+ std::string::size_type q2pos = in_word.find('>', qpos);
+ if (q2pos == std::string::npos)
+ return slst; // bad XML input
+
+ q2pos = in_word.find("<word", q2pos);
+ if (q2pos == std::string::npos)
+ return slst; // bad XML input
+
+ if (check_xml_par(in_word, qpos, "type=", "analyze")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
+ if (!cw.empty())
+ slst = analyze(cw);
+ if (slst.empty())
+ return slst;
+ // convert the result to <code><a>ana1</a><a>ana2</a></code> format
+ std::string r;
+ r.append("<code>");
+ for (size_t i = 0; i < slst.size(); ++i) {
+ r.append("<a>");
+
+ std::string entry(slst[i]);
+ mystrrep(entry, "\t", " ");
+ mystrrep(entry, "&", "&amp;");
+ mystrrep(entry, "<", "&lt;");
+ r.append(entry);
+
+ r.append("</a>");
+ }
+ r.append("</code>");
+ slst.clear();
+ slst.push_back(r);
+ return slst;
+ } else if (check_xml_par(in_word, qpos, "type=", "stem")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
+ if (!cw.empty())
+ return stem(cw);
+ } else if (check_xml_par(in_word, qpos, "type=", "generate")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
+ if (cw.empty())
+ return slst;
+ std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
+ if (q3pos != std::string::npos) {
+ std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
+ if (!cw2.empty()) {
+ return generate(cw, cw2);
+ }
+ } else {
+ q2pos = in_word.find("<code", q2pos + 1);
+ if (q2pos != std::string::npos) {
+ std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>");
+ if (!slst2.empty()) {
+ slst = generate(cw, slst2);
+ uniqlist(slst);
+ return slst;
+ }
+ }
+ }
+ } else if (check_xml_par(in_word, qpos, "type=", "add")) {
+ std::string cw = get_xml_par(in_word, in_word.find('>', q2pos));
+ if (cw.empty())
+ return slst;
+ std::string::size_type q3pos = in_word.find("<word", q2pos + 1);
+ if (q3pos != std::string::npos) {
+ std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos));
+ if (!cw2.empty()) {
+ add_with_affix(cw, cw2);
+ } else {
+ add(cw);
+ }
+ } else {
+ add(cw);
+ }
+ }
+ return slst;
+}
+
+std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) {
+ std::vector<std::string> slst;
+ struct hentry* he = NULL;
+ int len;
+ std::string w2;
+ const char* word;
+ const char* ignoredchars = pAMgr->get_ignore();
+ if (ignoredchars != NULL) {
+ w2.assign(root_word);
+ if (utf8) {
+ const std::vector<w_char>& ignoredchars_utf16 =
+ pAMgr->get_ignore_utf16();
+ remove_ignored_chars_utf(w2, ignoredchars_utf16);
+ } else {
+ remove_ignored_chars(w2, ignoredchars);
+ }
+ word = w2.c_str();
+ } else
+ word = root_word.c_str();
+
+ len = strlen(word);
+
+ if (!len)
+ return slst;
+
+ for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) {
+ he = m_HMgrs[i]->lookup(word);
+ }
+ if (he) {
+ slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str());
+ }
+ return slst;
+}
+
+namespace {
+ int munge_vector(char*** slst, const std::vector<std::string>& items) {
+ if (items.empty()) {
+ *slst = NULL;
+ return 0;
+ } else {
+ *slst = (char**)malloc(sizeof(char*) * items.size());
+ if (!*slst)
+ return 0;
+ for (size_t i = 0; i < items.size(); ++i)
+ (*slst)[i] = mystrdup(items[i].c_str());
+ }
+ return items.size();
+ }
+}
+
+int HunspellImpl::spell(const char* word, int* info, char** root) {
+ std::string sroot;
+ bool ret = spell(word, info, root ? &sroot : NULL);
+ if (root) {
+ if (sroot.empty()) {
+ *root = NULL;
+ } else {
+ *root = mystrdup(sroot.c_str());
+ }
+ }
+ return ret;
+}
+
+int HunspellImpl::suggest(char*** slst, const char* word) {
+ std::vector<std::string> suggests = suggest(word);
+ return munge_vector(slst, suggests);
+}
+
+int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) {
+ std::vector<std::string> stems = suffix_suggest(root_word);
+ return munge_vector(slst, stems);
+}
+
+void HunspellImpl::free_list(char*** slst, int n) {
+ if (slst && *slst) {
+ for (int i = 0; i < n; i++)
+ free((*slst)[i]);
+ free(*slst);
+ *slst = NULL;
+ }
+}
+
+char* HunspellImpl::get_dic_encoding() {
+ return &encoding[0];
+}
+
+int HunspellImpl::analyze(char*** slst, const char* word) {
+ std::vector<std::string> stems = analyze(word);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::stem(char*** slst, const char* word) {
+ std::vector<std::string> stems = stem(word);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::stem(char*** slst, char** desc, int n) {
+ std::vector<std::string> morph;
+ morph.reserve(n);
+ for (int i = 0; i < n; ++i)
+ morph.push_back(desc[i]);
+
+ std::vector<std::string> stems = stem(morph);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) {
+ std::vector<std::string> stems = generate(word, pattern);
+ return munge_vector(slst, stems);
+}
+
+int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) {
+ std::vector<std::string> morph;
+ morph.reserve(pln);
+ for (int i = 0; i < pln; ++i)
+ morph.push_back(pl[i]);
+
+ std::vector<std::string> stems = generate(word, morph);
+ return munge_vector(slst, stems);
+}
+
+const char* HunspellImpl::get_wordchars() const {
+ return get_wordchars_cpp().c_str();
+}
+
+const char* HunspellImpl::get_version() const {
+ return get_version_cpp().c_str();
+}
+
+int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) {
+ std::string d;
+ bool ret = input_conv(word, d);
+ if (ret && d.size() < destsize) {
+ strncpy(dest, d.c_str(), destsize);
+ return 1;
+ }
+ return 0;
+}
+
+Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key)
+ : m_Impl(new HunspellImpl(affpath, dpath, key)) {
+}
+
+Hunspell::~Hunspell() {
+ delete m_Impl;
+}
+
+// load extra dictionaries
+int Hunspell::add_dic(const char* dpath, const char* key) {
+ return m_Impl->add_dic(dpath, key);
+}
+
+bool Hunspell::spell(const std::string& word, int* info, std::string* root) {
+ return m_Impl->spell(word, info, root);
+}
+
+std::vector<std::string> Hunspell::suggest(const std::string& word) {
+ return m_Impl->suggest(word);
+}
+
+std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) {
+ return m_Impl->suffix_suggest(root_word);
+}
+
+const std::string& Hunspell::get_dict_encoding() const {
+ return m_Impl->get_dict_encoding();
+}
+
+std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) {
+ return m_Impl->stem(desc);
+}
+
+std::vector<std::string> Hunspell::stem(const std::string& word) {
+ return m_Impl->stem(word);
+}
+
+const std::string& Hunspell::get_wordchars_cpp() const {
+ return m_Impl->get_wordchars_cpp();
+}
+
+const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
+ return m_Impl->get_wordchars_utf16();
+}
+
+int Hunspell::add(const std::string& word) {
+ return m_Impl->add(word);
+}
+
+int Hunspell::add_with_affix(const std::string& word, const std::string& example) {
+ return m_Impl->add_with_affix(word, example);
+}
+
+int Hunspell::remove(const std::string& word) {
+ return m_Impl->remove(word);
+}
+
+const std::string& Hunspell::get_version_cpp() const {
+ return m_Impl->get_version_cpp();
+}
+
+struct cs_info* Hunspell::get_csconv() {
+ return m_Impl->get_csconv();
+}
+
+std::vector<std::string> Hunspell::analyze(const std::string& word) {
+ return m_Impl->analyze(word);
+}
+
+std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) {
+ return m_Impl->generate(word, pl);
+}
+
+std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) {
+ return m_Impl->generate(word, pattern);
+}
+
+int Hunspell::get_langnum() const {
+ return m_Impl->get_langnum();
+}
+
+bool Hunspell::input_conv(const std::string& word, std::string& dest) {
+ return m_Impl->input_conv(word, dest);
+}
+
+int Hunspell::spell(const char* word, int* info, char** root) {
+ return m_Impl->spell(word, info, root);
+}
+
+int Hunspell::suggest(char*** slst, const char* word) {
+ return m_Impl->suggest(slst, word);
+}
+
+int Hunspell::suffix_suggest(char*** slst, const char* root_word) {
+ return m_Impl->suffix_suggest(slst, root_word);
+}
+
+void Hunspell::free_list(char*** slst, int n) {
+ m_Impl->free_list(slst, n);
+}
+
+char* Hunspell::get_dic_encoding() {
+ return m_Impl->get_dic_encoding();
+}
+
+int Hunspell::analyze(char*** slst, const char* word) {
+ return m_Impl->analyze(slst, word);
+}
+
+int Hunspell::stem(char*** slst, const char* word) {
+ return m_Impl->stem(slst, word);
+}
+
+int Hunspell::stem(char*** slst, char** desc, int n) {
+ return m_Impl->stem(slst, desc, n);
+}
+
+int Hunspell::generate(char*** slst, const char* word, const char* pattern) {
+ return m_Impl->generate(slst, word, pattern);
+}
+
+int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) {
+ return m_Impl->generate(slst, word, pl, pln);
+}
+
+const char* Hunspell::get_wordchars() const {
+ return m_Impl->get_wordchars();
+}
+
+const char* Hunspell::get_version() const {
+ return m_Impl->get_version();
+}
+
+int Hunspell::input_conv(const char* word, char* dest, size_t destsize) {
+ return m_Impl->input_conv(word, dest, destsize);
+}
+
+Hunhandle* Hunspell_create(const char* affpath, const char* dpath) {
+ return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath));
+}
+
+Hunhandle* Hunspell_create_key(const char* affpath,
+ const char* dpath,
+ const char* key) {
+ return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key));
+}
+
+void Hunspell_destroy(Hunhandle* pHunspell) {
+ delete reinterpret_cast<HunspellImpl*>(pHunspell);
+}
+
+int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath);
+}
+
+int Hunspell_spell(Hunhandle* pHunspell, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word);
+}
+
+char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding();
+}
+
+int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word);
+}
+
+int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word);
+}
+
+int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word);
+}
+
+int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n);
+}
+
+int Hunspell_generate(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ const char* pattern)
+{
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern);
+}
+
+int Hunspell_generate2(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ char** desc,
+ int n)
+{
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n);
+}
+
+/* functions for run-time modification of the dictionary */
+
+/* add word to the run-time dictionary */
+
+int Hunspell_add(Hunhandle* pHunspell, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word);
+}
+
+/* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+int Hunspell_add_with_affix(Hunhandle* pHunspell,
+ const char* word,
+ const char* example) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example);
+}
+
+/* remove word from the run-time dictionary */
+
+int Hunspell_remove(Hunhandle* pHunspell, const char* word) {
+ return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word);
+}
+
+void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) {
+ reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n);
+}
diff --git a/extensions/spellcheck/hunspell/src/hunspell.h b/extensions/spellcheck/hunspell/src/hunspell.h
new file mode 100644
index 0000000000..3aca30ab2f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.h
@@ -0,0 +1,162 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Hunspell, based on MySpell.
+ *
+ * The Initial Developers of the Original Code are
+ * Kevin Hendricks (MySpell) and Németh László (Hunspell).
+ * Portions created by the Initial Developers are Copyright (C) 2002-2005
+ * the Initial Developers. All Rights Reserved.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef MYSPELLMGR_H_
+#define MYSPELLMGR_H_
+
+#include "hunvisapi.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct Hunhandle Hunhandle;
+
+LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath,
+ const char* dpath);
+
+LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath,
+ const char* dpath,
+ const char* key);
+
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell);
+
+/* load extra dictionaries (only dic files)
+ * output: 0 = additional dictionary slots available, 1 = slots are now full*/
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
+ const char* dpath);
+
+/* spell(word) - spellcheck word
+ * output: 0 = bad word, not 0 = good word
+ */
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
+
+LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
+
+/* suggest(suggestions, word) - search suggestions
+ * input: pointer to an array of strings pointer and the (bad) word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* morphological functions */
+
+/* analyze(result, word) - morphological analysis of the word */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* stem(result, word) - stemmer function */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word);
+
+/* stem(result, analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = Hunspell_analyze(result, "words");
+ * int n2 = Hunspell_stem2(result2, result, n1);
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell,
+ char*** slst,
+ char** desc,
+ int n);
+
+/* generate(result, word, word2) - morphological generation by example(s) */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ const char* word2);
+
+/* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = Hunspell_generate2(result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell,
+ char*** slst,
+ const char* word,
+ char** desc,
+ int n);
+
+/* functions for run-time modification of the dictionary */
+
+/* add word to the run-time dictionary */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell,
+ const char* word);
+
+/* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell,
+ const char* word,
+ const char* example);
+
+/* remove word from the run-time dictionary */
+
+LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell,
+ const char* word);
+
+/* free suggestion lists */
+
+LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell,
+ char*** slst,
+ int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunspell.hxx b/extensions/spellcheck/hunspell/src/hunspell.hxx
new file mode 100644
index 0000000000..8640a35ca1
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunspell.hxx
@@ -0,0 +1,232 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef MYSPELLMGR_HXX_
+#define MYSPELLMGR_HXX_
+
+#include "hunvisapi.h"
+#include "w_char.hxx"
+#include "atypes.hxx"
+#include <string>
+#include <vector>
+
+#define SPELL_XML "<?xml?>"
+
+#ifndef MAXSUGGESTION
+#define MAXSUGGESTION 15
+#endif
+
+#define MAXSHARPS 5
+
+#ifndef MAXWORDLEN
+#define MAXWORDLEN 100
+#endif
+
+#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+# define H_DEPRECATED __attribute__((__deprecated__))
+#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
+# define H_DEPRECATED __declspec(deprecated)
+#else
+# define H_DEPRECATED
+#endif
+
+class HunspellImpl;
+
+class LIBHUNSPELL_DLL_EXPORTED Hunspell {
+ private:
+ Hunspell(const Hunspell&);
+ Hunspell& operator=(const Hunspell&);
+
+ private:
+ HunspellImpl* m_Impl;
+
+ public:
+ /* Hunspell(aff, dic) - constructor of Hunspell class
+ * input: path of affix file and dictionary file
+ *
+ * In WIN32 environment, use UTF-8 encoded paths started with the long path
+ * prefix \\\\?\\ to handle system-independent character encoding and very
+ * long path names (without the long path prefix Hunspell will use fopen()
+ * with system-dependent character encoding instead of _wfopen()).
+ */
+ Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
+ ~Hunspell();
+
+ /* load extra dictionaries (only dic files) */
+ int add_dic(const char* dpath, const char* key = NULL);
+
+ /* spell(word) - spellcheck word
+ * output: false = bad word, true = good word
+ *
+ * plus output:
+ * info: information bit array, fields:
+ * SPELL_COMPOUND = a compound word
+ * SPELL_FORBIDDEN = an explicit forbidden word
+ * root: root (stem), when input is a word with affix(es)
+ */
+ bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
+ H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL);
+
+ /* suggest(suggestions, word) - search suggestions
+ * input: pointer to an array of strings pointer and the (bad) word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+ std::vector<std::string> suggest(const std::string& word);
+ H_DEPRECATED int suggest(char*** slst, const char* word);
+
+ /* Suggest words from suffix rules
+ * suffix_suggest(suggestions, root_word)
+ * input: pointer to an array of strings pointer and the word
+ * array of strings pointer (here *slst) may not be initialized
+ * output: number of suggestions in string array, and suggestions in
+ * a newly allocated array of strings (*slts will be NULL when number
+ * of suggestion equals 0.)
+ */
+ std::vector<std::string> suffix_suggest(const std::string& root_word);
+ H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word);
+
+ /* deallocate suggestion lists */
+ H_DEPRECATED void free_list(char*** slst, int n);
+
+ const std::string& get_dict_encoding() const;
+ char* get_dic_encoding();
+
+ /* morphological functions */
+
+ /* analyze(result, word) - morphological analysis of the word */
+ std::vector<std::string> analyze(const std::string& word);
+ H_DEPRECATED int analyze(char*** slst, const char* word);
+
+ /* stem(word) - stemmer function */
+ std::vector<std::string> stem(const std::string& word);
+ H_DEPRECATED int stem(char*** slst, const char* word);
+
+ /* stem(analysis, n) - get stems from a morph. analysis
+ * example:
+ * char ** result, result2;
+ * int n1 = analyze(&result, "words");
+ * int n2 = stem(&result2, result, n1);
+ */
+ std::vector<std::string> stem(const std::vector<std::string>& morph);
+ H_DEPRECATED int stem(char*** slst, char** morph, int n);
+
+ /* generate(result, word, word2) - morphological generation by example(s) */
+ std::vector<std::string> generate(const std::string& word, const std::string& word2);
+ H_DEPRECATED int generate(char*** slst, const char* word, const char* word2);
+
+ /* generate(result, word, desc, n) - generation by morph. description(s)
+ * example:
+ * char ** result;
+ * char * affix = "is:plural"; // description depends from dictionaries, too
+ * int n = generate(&result, "word", &affix, 1);
+ * for (int i = 0; i < n; i++) printf("%s\n", result[i]);
+ */
+ std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
+ H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n);
+
+ /* functions for run-time modification of the dictionary */
+
+ /* add word to the run-time dictionary */
+
+ int add(const std::string& word);
+
+ /* add word to the run-time dictionary with affix flags of
+ * the example (a dictionary word): Hunspell will recognize
+ * affixed forms of the new word, too.
+ */
+
+ int add_with_affix(const std::string& word, const std::string& example);
+
+ /* remove word from the run-time dictionary */
+
+ int remove(const std::string& word);
+
+ /* other */
+
+ /* get extra word characters definied in affix file for tokenization */
+ const char* get_wordchars() const;
+ const std::string& get_wordchars_cpp() const;
+ const std::vector<w_char>& get_wordchars_utf16() const;
+
+ struct cs_info* get_csconv();
+
+ const char* get_version() const;
+ const std::string& get_version_cpp() const;
+
+ int get_langnum() const;
+
+ /* need for putdic */
+ bool input_conv(const std::string& word, std::string& dest);
+ H_DEPRECATED int input_conv(const char* word, char* dest, size_t destsize);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/hunvisapi.h b/extensions/spellcheck/hunspell/src/hunvisapi.h
new file mode 100644
index 0000000000..ed0a502ba2
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/hunvisapi.h
@@ -0,0 +1,18 @@
+#ifndef HUNSPELL_VISIBILITY_H_
+#define HUNSPELL_VISIBILITY_H_
+
+#if defined(HUNSPELL_STATIC)
+# define LIBHUNSPELL_DLL_EXPORTED
+#elif defined(_WIN32)
+# if defined(BUILDING_LIBHUNSPELL)
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
+# else
+# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
+# endif
+#elif defined(BUILDING_LIBHUNSPELL) && 1
+# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
+#else
+# define LIBHUNSPELL_DLL_EXPORTED
+#endif
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/langnum.hxx b/extensions/spellcheck/hunspell/src/langnum.hxx
new file mode 100644
index 0000000000..39e63efdaa
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/langnum.hxx
@@ -0,0 +1,76 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef LANGNUM_HXX_
+#define LANGNUM_HXX_
+
+/*
+ language numbers for language specific codes
+ see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199
+*/
+
+enum {
+ LANG_ar = 96,
+ LANG_az = 100, // custom number
+ LANG_bg = 41,
+ LANG_ca = 37,
+ LANG_crh = 102, // custom number
+ LANG_cs = 42,
+ LANG_da = 45,
+ LANG_de = 49,
+ LANG_el = 30,
+ LANG_en = 01,
+ LANG_es = 34,
+ LANG_eu = 10,
+ LANG_fr = 02,
+ LANG_gl = 38,
+ LANG_hr = 78,
+ LANG_hu = 36,
+ LANG_it = 39,
+ LANG_la = 99, // custom number
+ LANG_lv = 101, // custom number
+ LANG_nl = 31,
+ LANG_pl = 48,
+ LANG_pt = 03,
+ LANG_ru = 07,
+ LANG_sv = 50,
+ LANG_tr = 90,
+ LANG_uk = 80,
+ LANG_xx = 999
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/license.hunspell b/extensions/spellcheck/hunspell/src/license.hunspell
new file mode 100644
index 0000000000..fad5a16230
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/license.hunspell
@@ -0,0 +1,54 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s):
+ * David Einstein
+ * Davide Prina
+ * Giuseppe Modugno
+ * Gianluca Turconi
+ * Simon Brouwer
+ * Noll János
+ * Bíró Árpád
+ * Goldman Eleonóra
+ * Sarlós Tamás
+ * Bencsáth Boldizsár
+ * Halácsy Péter
+ * Dvornik László
+ * Gefferth András
+ * Nagy Viktor
+ * Varga Dániel
+ * Chris Halls
+ * Rene Engelhard
+ * Bram Moolenaar
+ * Dafydd Jones
+ * Harri Pitkänen
+ * Andras Timar
+ * Tor Lillqvist
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
diff --git a/extensions/spellcheck/hunspell/src/license.myspell b/extensions/spellcheck/hunspell/src/license.myspell
new file mode 100644
index 0000000000..2da5330750
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/license.myspell
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *
+ * NOTE: A special thanks and credit goes to Geoff Kuenning
+ * the creator of ispell. MySpell's affix algorithms were
+ * based on those of ispell which should be noted is
+ * copyright Geoff Kuenning et.al. and now available
+ * under a BSD style license. For more information on ispell
+ * and affix compression in general, please see:
+ * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html
+ * (the home page for ispell)
+ *
+ * An almost complete rewrite of MySpell for use by
+ * the Mozilla project has been developed by David Einstein
+ * (Deinst@world.std.com). David and I are now
+ * working on parallel development tracks to help
+ * our respective projects (Mozilla and OpenOffice.org
+ * and we will maintain full affix file and dictionary
+ * file compatibility and work on merging our versions
+ * of MySpell back into a single tree. David has been
+ * a significant help in improving MySpell.
+ *
+ * Special thanks also go to La'szlo' Ne'meth
+ * <nemethl@gyorsposta.hu> who is the author of the
+ * Hungarian dictionary and who developed and contributed
+ * the code to support compound words in MySpell
+ * and fixed numerous problems with the encoding
+ * case conversion tables.
+ *
+ */
diff --git a/extensions/spellcheck/hunspell/src/moz.build b/extensions/spellcheck/hunspell/src/moz.build
new file mode 100644
index 0000000000..c6f7930262
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/moz.build
@@ -0,0 +1,30 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+include("sources.mozbuild")
+
+UNIFIED_SOURCES += hunspell_sources
+
+DEFINES['HUNSPELL_STATIC'] = True
+
+FINAL_LIBRARY = 'xul'
+
+LOCAL_INCLUDES += [
+ '../glue',
+]
+
+# We allow warnings for third-party code that can be updated from upstream.
+AllowCompilerWarnings()
+
+include('/ipc/chromium/chromium-config.mozbuild')
+include('../glue/common.mozbuild')
+
+HunspellIncludes()
+
+if CONFIG['CC_TYPE'] in ('clang', 'clang-cl'):
+ CXXFLAGS += [
+ '-Wno-implicit-fallthrough',
+ ]
diff --git a/extensions/spellcheck/hunspell/src/moz.yaml b/extensions/spellcheck/hunspell/src/moz.yaml
new file mode 100644
index 0000000000..64987b4100
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/moz.yaml
@@ -0,0 +1,16 @@
+# Version of this schema
+schema: 1
+
+bugzilla:
+ product: "Core"
+ component: "Spelling checker"
+
+origin:
+ name: "Hunspell"
+ description: "Spell Checker"
+
+ url: "http://hunspell.github.io/"
+ license: "MPL-1.1"
+ license-file: "COPYING.MPL"
+
+ release: "1.7.0"
diff --git a/extensions/spellcheck/hunspell/src/phonet.cxx b/extensions/spellcheck/hunspell/src/phonet.cxx
new file mode 100644
index 0000000000..69601a2872
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/phonet.cxx
@@ -0,0 +1,270 @@
+/* phonetic.c - generic replacement aglogithms for phonetic transformation
+ Copyright (C) 2000 Bjoern Jacke
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation;
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; If not, see
+ <http://www.gnu.org/licenses/>.
+
+ Changelog:
+
+ 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
+ Initial Release insprired by the article about phonetic
+ transformations out of c't 25/1999
+
+ 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
+ Released under MPL/GPL/LGPL tri-license for Hunspell
+
+ 2007-08-23 Laszlo Nemeth <nemeth at OOo>
+ Porting from Aspell to Hunspell using C-like structs
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "csutil.hxx"
+#include "phonet.hxx"
+
+void init_phonet_hash(phonetable& parms) {
+ for (int i = 0; i < HASHSIZE; i++) {
+ parms.hash[i] = -1;
+ }
+
+ for (int i = 0; parms.rules[i][0] != '\0'; i += 2) {
+ /** set hash value **/
+ int k = (unsigned char)parms.rules[i][0];
+
+ if (parms.hash[k] < 0) {
+ parms.hash[k] = i;
+ }
+ }
+}
+
+// like strcpy but safe if the strings overlap
+// but only if dest < src
+static inline void strmove(char* dest, char* src) {
+ while (*src)
+ *dest++ = *src++;
+ *dest = '\0';
+}
+
+static int myisalpha(char ch) {
+ if ((unsigned char)ch < 128)
+ return isalpha(ch);
+ return 1;
+}
+
+/* Do phonetic transformation. */
+/* phonetic transcription algorithm */
+/* see: http://aspell.net/man-html/Phonetic-Code.html */
+/* convert string to uppercase before this call */
+std::string phonet(const std::string& inword, phonetable& parms) {
+
+ int i, k = 0, p, z;
+ int k0, n0, p0 = -333;
+ char c;
+ typedef unsigned char uchar;
+
+ size_t len = inword.size();
+ if (len > MAXPHONETUTF8LEN)
+ return std::string();
+ char word[MAXPHONETUTF8LEN + 1];
+ strncpy(word, inword.c_str(), MAXPHONETUTF8LEN);
+ word[MAXPHONETUTF8LEN] = '\0';
+
+ std::string target;
+ /** check word **/
+ i = z = 0;
+ while ((c = word[i]) != '\0') {
+ int n = parms.hash[(uchar)c];
+ int z0 = 0;
+
+ if (n >= 0 && !parms.rules[n].empty()) {
+ /** check all rules for the same letter **/
+ while (parms.rules[n][0] == c) {
+ /** check whole string **/
+ k = 1; /** number of found letters **/
+ p = 5; /** default priority **/
+ const char*s = parms.rules[n].c_str();
+ s++; /** important for (see below) "*(s-1)" **/
+
+ while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
+ strchr("(-<^$", *s) == NULL) {
+ k++;
+ s++;
+ }
+ if (*s == '(') {
+ /** check letters in "(..)" **/
+ if (myisalpha(word[i + k]) // ...could be implied?
+ && strchr(s + 1, word[i + k]) != NULL) {
+ k++;
+ while (*s != ')')
+ s++;
+ s++;
+ }
+ }
+ p0 = (int)*s;
+ k0 = k;
+ while (*s == '-' && k > 1) {
+ k--;
+ s++;
+ }
+ if (*s == '<')
+ s++;
+ if (isdigit((unsigned char)*s)) {
+ /** determine priority **/
+ p = *s - '0';
+ s++;
+ }
+ if (*s == '^' && *(s + 1) == '^')
+ s++;
+
+ if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) &&
+ (*(s + 1) != '$' || (!myisalpha(word[i + k0])))) ||
+ (*s == '$' && i > 0 && myisalpha(word[i - 1]) &&
+ (!myisalpha(word[i + k0])))) {
+ /** search for followup rules, if: **/
+ /** parms.followup and k > 1 and NO '-' in searchstring **/
+ char c0 = word[i + k - 1];
+ n0 = parms.hash[(uchar)c0];
+
+ // if (parms.followup && k > 1 && n0 >= 0
+ if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0' && !parms.rules[n0].empty()) {
+ /** test follow-up rule for "word[i+k]" **/
+ while (parms.rules[n0][0] == c0) {
+ /** check whole string **/
+ k0 = k;
+ p0 = 5;
+ s = parms.rules[n0].c_str();
+ s++;
+ while (*s != '\0' && word[i + k0] == *s &&
+ !isdigit((unsigned char)*s) &&
+ strchr("(-<^$", *s) == NULL) {
+ k0++;
+ s++;
+ }
+ if (*s == '(') {
+ /** check letters **/
+ if (myisalpha(word[i + k0]) &&
+ strchr(s + 1, word[i + k0]) != NULL) {
+ k0++;
+ while (*s != ')' && *s != '\0')
+ s++;
+ if (*s == ')')
+ s++;
+ }
+ }
+ while (*s == '-') {
+ /** "k0" gets NOT reduced **/
+ /** because "if (k0 == k)" **/
+ s++;
+ }
+ if (*s == '<')
+ s++;
+ if (isdigit((unsigned char)*s)) {
+ p0 = *s - '0';
+ s++;
+ }
+
+ if (*s == '\0'
+ /** *s == '^' cuts **/
+ || (*s == '$' && !myisalpha(word[i + k0]))) {
+ if (k0 == k) {
+ /** this is just a piece of the string **/
+ n0 += 2;
+ continue;
+ }
+
+ if (p0 < p) {
+ /** priority too low **/
+ n0 += 2;
+ continue;
+ }
+ /** rule fits; stop search **/
+ break;
+ }
+ n0 += 2;
+ } /** End of "while (parms.rules[n0][0] == c0)" **/
+
+ if (p0 >= p && parms.rules[n0][0] == c0) {
+ n += 2;
+ continue;
+ }
+ } /** end of follow-up stuff **/
+
+ /** replace string **/
+ s = parms.rules[n + 1].c_str();
+ p0 = (!parms.rules[n].empty() &&
+ strchr(parms.rules[n].c_str() + 1, '<') != NULL)
+ ? 1
+ : 0;
+ if (p0 == 1 && z == 0) {
+ /** rule with '<' is used **/
+ if (!target.empty() && *s != '\0' &&
+ (target[target.size()-1] == c || target[target.size()-1] == *s)) {
+ target.erase(target.size() - 1);
+ }
+ z0 = 1;
+ z = 1;
+ k0 = 0;
+ while (*s != '\0' && word[i + k0] != '\0') {
+ word[i + k0] = *s;
+ k0++;
+ s++;
+ }
+ if (k > k0)
+ strmove(&word[0] + i + k0, &word[0] + i + k);
+
+ /** new "actual letter" **/
+ c = word[i];
+ } else { /** no '<' rule used **/
+ i += k - 1;
+ z = 0;
+ while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) {
+ if (target.empty() || target[target.size()-1] != *s) {
+ target.push_back(*s);
+ }
+ s++;
+ }
+ /** new "actual letter" **/
+ c = *s;
+ if (!parms.rules[n].empty() &&
+ strstr(parms.rules[n].c_str() + 1, "^^") != NULL) {
+ if (c != '\0') {
+ target.push_back(c);
+ }
+ strmove(&word[0], &word[0] + i + 1);
+ i = 0;
+ z0 = 1;
+ }
+ }
+ break;
+ } /** end of follow-up stuff **/
+ n += 2;
+ } /** end of while (parms.rules[n][0] == c) **/
+ } /** end of if (n >= 0) **/
+ if (z0 == 0) {
+ if (k && !p0 && target.size() < len && c != '\0') {
+ /** condense only double letters **/
+ target.push_back(c);
+ /// printf("\n setting \n");
+ }
+
+ i++;
+ z = 0;
+ k = 0;
+ }
+ } /** end of while ((c = word[i]) != '\0') **/
+
+ return target;
+} /** end of function "phonet" **/
diff --git a/extensions/spellcheck/hunspell/src/phonet.hxx b/extensions/spellcheck/hunspell/src/phonet.hxx
new file mode 100644
index 0000000000..2d58b3ba1b
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/phonet.hxx
@@ -0,0 +1,50 @@
+/* phonetic.c - generic replacement aglogithms for phonetic transformation
+ Copyright (C) 2000 Bjoern Jacke
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation;
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; If not, see
+ <http://www.gnu.org/licenses/>.
+
+ Changelog:
+
+ 2000-01-05 Bjoern Jacke <bjoern at j3e.de>
+ Initial Release insprired by the article about phonetic
+ transformations out of c't 25/1999
+
+ 2007-07-26 Bjoern Jacke <bjoern at j3e.de>
+ Released under MPL/GPL/LGPL tri-license for Hunspell
+
+ 2007-08-23 Laszlo Nemeth <nemeth at OOo>
+ Porting from Aspell to Hunspell using C-like structs
+*/
+
+#ifndef PHONET_HXX_
+#define PHONET_HXX_
+
+#define HASHSIZE 256
+#define MAXPHONETLEN 256
+#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
+
+#include "hunvisapi.h"
+
+struct phonetable {
+ char utf8;
+ std::vector<std::string> rules;
+ int hash[HASHSIZE];
+};
+
+LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms);
+
+LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword,
+ phonetable& phone);
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/replist.cxx b/extensions/spellcheck/hunspell/src/replist.cxx
new file mode 100644
index 0000000000..1395ade607
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/replist.cxx
@@ -0,0 +1,196 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <limits>
+
+#include "replist.hxx"
+#include "csutil.hxx"
+
+RepList::RepList(int n) {
+ dat = (replentry**)malloc(sizeof(replentry*) * n);
+ if (dat == 0)
+ size = 0;
+ else
+ size = n;
+ pos = 0;
+}
+
+RepList::~RepList() {
+ for (int i = 0; i < pos; i++) {
+ delete dat[i];
+ }
+ free(dat);
+}
+
+replentry* RepList::item(int n) {
+ return dat[n];
+}
+
+int RepList::find(const char* word) {
+ int p1 = 0;
+ int p2 = pos - 1;
+ int ret = -1;
+ while (p1 <= p2) {
+ int m = ((unsigned)p1 + (unsigned)p2) >> 1;
+ int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size());
+ if (c < 0)
+ p2 = m - 1;
+ else if (c > 0)
+ p1 = m + 1;
+ else { // scan in the right half for a longer match
+ ret = m;
+ p1 = m + 1;
+ }
+ }
+ return ret;
+}
+
+std::string RepList::replace(const char* word, int ind, bool atstart) {
+ int type = atstart ? 1 : 0;
+ if (ind < 0)
+ return std::string();
+ if (strlen(word) == dat[ind]->pattern.size())
+ type = atstart ? 3 : 2;
+ while (type && dat[ind]->outstrings[type].empty())
+ type = (type == 2 && !atstart) ? 0 : type - 1;
+ return dat[ind]->outstrings[type];
+}
+
+int RepList::add(const std::string& in_pat1, const std::string& pat2) {
+ if (pos >= size || in_pat1.empty() || pat2.empty()) {
+ return 1;
+ }
+ // analyse word context
+ int type = 0;
+ std::string pat1(in_pat1);
+ if (pat1[0] == '_') {
+ pat1.erase(0, 1);
+ type = 1;
+ }
+ if (!pat1.empty() && pat1[pat1.size() - 1] == '_') {
+ type = type + 2;
+ pat1.erase(pat1.size() - 1);
+ }
+ mystrrep(pat1, "_", " ");
+
+ // find existing entry
+ int m = find(pat1.c_str());
+ if (m >= 0 && dat[m]->pattern == pat1) {
+ // since already used
+ dat[m]->outstrings[type] = pat2;
+ mystrrep(dat[m]->outstrings[type], "_", " ");
+ return 0;
+ }
+
+ // make a new entry if none exists
+ replentry* r = new replentry;
+ if (r == NULL)
+ return 1;
+ r->pattern = pat1;
+ r->outstrings[type] = pat2;
+ mystrrep(r->outstrings[type], "_", " ");
+ dat[pos++] = r;
+ // sort to the right place in the list
+ int i;
+ for (i = pos - 1; i > 0; i--) {
+ if (strcmp(r->pattern.c_str(), dat[i - 1]->pattern.c_str()) < 0) {
+ dat[i] = dat[i - 1];
+ } else
+ break;
+ }
+ dat[i] = r;
+ return 0;
+}
+
+bool RepList::conv(const std::string& in_word, std::string& dest) {
+ dest.clear();
+
+ size_t wordlen = in_word.size();
+ const char* word = in_word.c_str();
+
+ bool change = false;
+ for (size_t i = 0; i < wordlen; ++i) {
+ int n = find(word + i);
+ std::string l = replace(word + i, n, i == 0);
+ if (!l.empty()) {
+ dest.append(l);
+ i += dat[n]->pattern.size() - 1;
+ change = true;
+ } else {
+ dest.push_back(word[i]);
+ }
+ }
+
+ return change;
+}
+
diff --git a/extensions/spellcheck/hunspell/src/replist.hxx b/extensions/spellcheck/hunspell/src/replist.hxx
new file mode 100644
index 0000000000..08daeb4488
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/replist.hxx
@@ -0,0 +1,100 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* string replacement list class */
+#ifndef REPLIST_HXX_
+#define REPLIST_HXX_
+
+#include "w_char.hxx"
+
+#include <string>
+#include <vector>
+
+class RepList {
+ private:
+ RepList(const RepList&);
+ RepList& operator=(const RepList&);
+
+ protected:
+ replentry** dat;
+ int size;
+ int pos;
+
+ public:
+ explicit RepList(int n);
+ ~RepList();
+
+ int add(const std::string& pat1, const std::string& pat2);
+ replentry* item(int n);
+ int find(const char* word);
+ std::string replace(const char* word, int n, bool atstart);
+ bool conv(const std::string& word, std::string& dest);
+};
+#endif
diff --git a/extensions/spellcheck/hunspell/src/sources.mozbuild b/extensions/spellcheck/hunspell/src/sources.mozbuild
new file mode 100644
index 0000000000..6649bedb1f
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/sources.mozbuild
@@ -0,0 +1,17 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+hunspell_sources = [
+ '../glue/mozHunspellRLBoxSandbox.cpp',
+ 'affentry.cxx',
+ 'affixmgr.cxx',
+ 'csutil.cxx',
+ 'hashmgr.cxx',
+ 'hunspell.cxx',
+ 'phonet.cxx',
+ 'replist.cxx',
+ 'suggestmgr.cxx',
+]
diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.cxx b/extensions/spellcheck/hunspell/src/suggestmgr.cxx
new file mode 100644
index 0000000000..6b363debd5
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.cxx
@@ -0,0 +1,2263 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <time.h>
+
+#include "suggestmgr.hxx"
+#include "htypes.hxx"
+#include "csutil.hxx"
+
+const w_char W_VLINE = {'\0', '|'};
+
+#define MAX_CHAR_DISTANCE 4
+
+SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) {
+ // register affix manager and check in string of chars to
+ // try when building candidate suggestions
+ pAMgr = aptr;
+
+ csconv = NULL;
+
+ ckeyl = 0;
+ ckey = NULL;
+
+ ctryl = 0;
+ ctry = NULL;
+
+ utf8 = 0;
+ langnum = 0;
+ complexprefixes = 0;
+
+ maxSug = maxn;
+ nosplitsugs = 0;
+ maxngramsugs = MAXNGRAMSUGS;
+ maxcpdsugs = MAXCOMPOUNDSUGS;
+
+ if (pAMgr) {
+ langnum = pAMgr->get_langnum();
+ ckey = pAMgr->get_key_string();
+ nosplitsugs = pAMgr->get_nosplitsugs();
+ if (pAMgr->get_maxngramsugs() >= 0)
+ maxngramsugs = pAMgr->get_maxngramsugs();
+ utf8 = pAMgr->get_utf8();
+ if (pAMgr->get_maxcpdsugs() >= 0)
+ maxcpdsugs = pAMgr->get_maxcpdsugs();
+ if (!utf8) {
+ csconv = get_current_cs(pAMgr->get_encoding());
+ }
+ complexprefixes = pAMgr->get_complexprefixes();
+ }
+
+ if (ckey) {
+ if (utf8) {
+ ckeyl = u8_u16(ckey_utf, ckey);
+ } else {
+ ckeyl = strlen(ckey);
+ }
+ }
+
+ if (tryme) {
+ ctry = mystrdup(tryme);
+ if (ctry)
+ ctryl = strlen(ctry);
+ if (ctry && utf8) {
+ ctryl = u8_u16(ctry_utf, tryme);
+ }
+ }
+
+ // language with possible dash usage
+ // (latin letters or dash in TRY characters)
+ lang_with_dash_usage = (ctry &&
+ ((strchr(ctry, '-') != NULL) || (strchr(ctry, 'a') != NULL)));
+}
+
+SuggestMgr::~SuggestMgr() {
+ pAMgr = NULL;
+ if (ckey)
+ free(ckey);
+ ckey = NULL;
+ ckeyl = 0;
+ if (ctry)
+ free(ctry);
+ ctry = NULL;
+ ctryl = 0;
+ maxSug = 0;
+#ifdef MOZILLA_CLIENT
+ delete[] csconv;
+#endif
+}
+
+void SuggestMgr::testsug(std::vector<std::string>& wlst,
+ const std::string& candidate,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit) {
+ int cwrd = 1;
+ if (wlst.size() == maxSug)
+ return;
+ for (size_t k = 0; k < wlst.size(); ++k) {
+ if (wlst[k] == candidate) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) {
+ wlst.push_back(candidate);
+ }
+}
+
+/* generate suggestions for a misspelled word
+ * pass in address of array of char * pointers
+ * onlycompoundsug: probably bad suggestions (need for ngram sugs, too)
+ * return value: true, if there is a good suggestion
+ * (REP, ph: or a dictionary word pair)
+ */
+bool SuggestMgr::suggest(std::vector<std::string>& slst,
+ const char* w,
+ int* onlycompoundsug) {
+ int nocompoundtwowords = 0;
+ std::vector<w_char> word_utf;
+ int wl = 0;
+ size_t nsugorig = slst.size();
+ std::string w2;
+ const char* word = w;
+ size_t oldSug = 0;
+ bool good_suggestion = false;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ w2.assign(w);
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ word = w2.c_str();
+ }
+
+ if (utf8) {
+ wl = u8_u16(word_utf, word);
+ if (wl == -1) {
+ return false;
+ }
+ }
+
+ for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0) && !good_suggestion;
+ cpdsuggest++) {
+
+ clock_t timelimit;
+ // initialize both in non-compound and compound cycles
+ timelimit = clock();
+
+ // limit compound suggestion
+ if (cpdsuggest > 0)
+ oldSug = slst.size();
+
+ // suggestions for an uppercase word (html -> HTML)
+ if (slst.size() < maxSug) {
+ size_t i = slst.size();
+ if (utf8)
+ capchars_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ capchars(slst, word, cpdsuggest);
+ if (slst.size() > i)
+ good_suggestion = true;
+ }
+
+ // perhaps we made a typical fault of spelling
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ size_t i = slst.size();
+ replchars(slst, word, cpdsuggest);
+ if (slst.size() > i)
+ good_suggestion = true;
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // perhaps we made chose the wrong char from a related set
+ if ((slst.size() < maxSug) &&
+ (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ mapchars(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // only suggest compound words when no other suggestion
+ if ((cpdsuggest == 0) && (slst.size() > nsugorig))
+ nocompoundtwowords = 1;
+
+ // did we swap the order of chars by mistake
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ swapchar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ swapchar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we swap the order of non adjacent chars by mistake
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ longswapchar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ longswapchar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we just hit the wrong key in place of a good char (case and keyboard)
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ badcharkey_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ badcharkey(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we add a char that should not be there
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ extrachar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ extrachar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we forgot a char
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ forgotchar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ forgotchar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we move a char
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ movechar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ movechar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we just hit the wrong key in place of a good char
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ badchar_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ badchar(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // did we double two characters
+ if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
+ if (utf8)
+ doubletwochars_utf(slst, word_utf.data(), wl, cpdsuggest);
+ else
+ doubletwochars(slst, word, cpdsuggest);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ // perhaps we forgot to hit space and two words ran together
+ // (dictionary word pairs have top priority here, so
+ // we always suggest them, in despite of nosplitsugs, and
+ // drop compound word and other suggestions)
+ if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) {
+ good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion);
+ }
+ if (clock() > timelimit + TIMELIMIT_SUGGESTION)
+ return good_suggestion;
+
+ } // repeating ``for'' statement compounding support
+
+ if (!nocompoundtwowords && (!slst.empty()) && onlycompoundsug)
+ *onlycompoundsug = 1;
+
+ return good_suggestion;
+}
+
+// suggestions for an uppercase word (html -> HTML)
+void SuggestMgr::capchars_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ mkallcap_utf(candidate_utf, langnum);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+}
+
+// suggestions for an uppercase word (html -> HTML)
+void SuggestMgr::capchars(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ mkallcap(candidate, csconv);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+}
+
+// suggestions for when chose the wrong char out of a related set
+int SuggestMgr::mapchars(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate;
+ clock_t timelimit;
+ int timer;
+
+ int wl = strlen(word);
+ if (wl < 2 || !pAMgr)
+ return wlst.size();
+
+ const std::vector<mapentry>& maptable = pAMgr->get_maptable();
+ if (maptable.empty())
+ return wlst.size();
+
+ timelimit = clock();
+ timer = MINTIMER;
+ return map_related(word, candidate, 0, wlst, cpdsuggest,
+ maptable, &timer, &timelimit);
+}
+
+int SuggestMgr::map_related(const char* word,
+ std::string& candidate,
+ int wn,
+ std::vector<std::string>& wlst,
+ int cpdsuggest,
+ const std::vector<mapentry>& maptable,
+ int* timer,
+ clock_t* timelimit) {
+ if (*(word + wn) == '\0') {
+ int cwrd = 1;
+ for (size_t m = 0; m < wlst.size(); ++m) {
+ if (wlst[m] == candidate) {
+ cwrd = 0;
+ break;
+ }
+ }
+ if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) {
+ if (wlst.size() < maxSug) {
+ wlst.push_back(candidate);
+ }
+ }
+ return wlst.size();
+ }
+ int in_map = 0;
+ for (size_t j = 0; j < maptable.size(); ++j) {
+ for (size_t k = 0; k < maptable[j].size(); ++k) {
+ size_t len = maptable[j][k].size();
+ if (strncmp(maptable[j][k].c_str(), word + wn, len) == 0) {
+ in_map = 1;
+ size_t cn = candidate.size();
+ for (size_t l = 0; l < maptable[j].size(); ++l) {
+ candidate.resize(cn);
+ candidate.append(maptable[j][l]);
+ map_related(word, candidate, wn + len, wlst,
+ cpdsuggest, maptable, timer, timelimit);
+ if (!(*timer))
+ return wlst.size();
+ }
+ }
+ }
+ }
+ if (!in_map) {
+ candidate.push_back(*(word + wn));
+ map_related(word, candidate, wn + 1, wlst, cpdsuggest,
+ maptable, timer, timelimit);
+ }
+ return wlst.size();
+}
+
+// suggestions for a typical fault of spelling, that
+// differs with more, than 1 letter from the right form.
+int SuggestMgr::replchars(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate;
+ int wl = strlen(word);
+ if (wl < 2 || !pAMgr)
+ return wlst.size();
+ const std::vector<replentry>& reptable = pAMgr->get_reptable();
+ for (size_t i = 0; i < reptable.size(); ++i) {
+ const char* r = word;
+ // search every occurence of the pattern in the word
+ while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) {
+ int type = (r == word) ? 1 : 0;
+ if (r - word + reptable[i].pattern.size() == strlen(word))
+ type += 2;
+ while (type && reptable[i].outstrings[type].empty())
+ type = (type == 2 && r != word) ? 0 : type - 1;
+ const std::string&out = reptable[i].outstrings[type];
+ if (out.empty()) {
+ ++r;
+ continue;
+ }
+ candidate.assign(word);
+ candidate.resize(r - word);
+ candidate.append(reptable[i].outstrings[type]);
+ candidate.append(r + reptable[i].pattern.size());
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ // check REP suggestions with space
+ size_t sp = candidate.find(' ');
+ if (sp != std::string::npos) {
+ size_t prev = 0;
+ while (sp != std::string::npos) {
+ std::string prev_chunk = candidate.substr(prev, sp - prev);
+ if (checkword(prev_chunk, 0, NULL, NULL)) {
+ size_t oldns = wlst.size();
+ std::string post_chunk = candidate.substr(sp + 1);
+ testsug(wlst, post_chunk, cpdsuggest, NULL, NULL);
+ if (oldns < wlst.size()) {
+ wlst[wlst.size() - 1] = candidate;
+ }
+ }
+ prev = sp + 1;
+ sp = candidate.find(' ', prev);
+ }
+ }
+ r++; // search for the next letter
+ }
+ }
+ return wlst.size();
+}
+
+// perhaps we doubled two characters
+// (for example vacation -> vacacation)
+// The recognized pattern with regex back-references:
+// "(.)(.)\1\2\1" or "..(.)(.)\1\2"
+
+int SuggestMgr::doubletwochars(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ int state = 0;
+ int wl = strlen(word);
+ if (wl < 5 || !pAMgr)
+ return wlst.size();
+ for (int i = 2; i < wl; i++) {
+ if (word[i] == word[i - 2]) {
+ state++;
+ if (state == 3 || (state == 2 && i >= 4)) {
+ std::string candidate(word, word + i - 1);
+ candidate.insert(candidate.end(), word + i + 1, word + wl);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ state = 0;
+ }
+ } else {
+ state = 0;
+ }
+ }
+ return wlst.size();
+}
+
+// perhaps we doubled two characters
+// (for example vacation -> vacacation)
+// The recognized pattern with regex back-references:
+// "(.)(.)\1\2\1" or "..(.)(.)\1\2"
+
+int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ int state = 0;
+ if (wl < 5 || !pAMgr)
+ return wlst.size();
+ for (int i = 2; i < wl; i++) {
+ if (word[i] == word[i - 2]) {
+ state++;
+ if (state == 3 || (state == 2 && i >= 4)) {
+ std::vector<w_char> candidate_utf(word, word + i - 1);
+ candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ state = 0;
+ }
+ } else {
+ state = 0;
+ }
+ }
+ return wlst.size();
+}
+
+// error is wrong char in place of correct one (case and keyboard related
+// version)
+int SuggestMgr::badcharkey(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+
+ // swap out each char one by one and try uppercase and neighbor
+ // keyboard chars in its place to see if that makes a good word
+ for (size_t i = 0; i < candidate.size(); ++i) {
+ char tmpc = candidate[i];
+ // check with uppercase letters
+ candidate[i] = csconv[((unsigned char)tmpc)].cupper;
+ if (tmpc != candidate[i]) {
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ candidate[i] = tmpc;
+ }
+ // check neighbor characters in keyboard string
+ if (!ckey)
+ continue;
+ char* loc = strchr(ckey, tmpc);
+ while (loc) {
+ if ((loc > ckey) && (*(loc - 1) != '|')) {
+ candidate[i] = *(loc - 1);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) {
+ candidate[i] = *(loc + 1);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ loc = strchr(loc + 1, tmpc);
+ }
+ candidate[i] = tmpc;
+ }
+ return wlst.size();
+}
+
+// error is wrong char in place of correct one (case and keyboard related
+// version)
+int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::string candidate;
+ std::vector<w_char> candidate_utf(word, word + wl);
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (int i = 0; i < wl; i++) {
+ w_char tmpc = candidate_utf[i];
+ // check with uppercase letters
+ candidate_utf[i] = upper_utf(candidate_utf[i], 1);
+ if (tmpc != candidate_utf[i]) {
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ candidate_utf[i] = tmpc;
+ }
+ // check neighbor characters in keyboard string
+ if (!ckey)
+ continue;
+ size_t loc = 0;
+ while ((loc < ckeyl) && ckey_utf[loc] != tmpc)
+ ++loc;
+ while (loc < ckeyl) {
+ if ((loc > 0) && ckey_utf[loc - 1] != W_VLINE) {
+ candidate_utf[i] = ckey_utf[loc - 1];
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ if (((loc + 1) < ckeyl) && (ckey_utf[loc + 1] != W_VLINE)) {
+ candidate_utf[i] = ckey_utf[loc + 1];
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ do {
+ loc++;
+ } while ((loc < ckeyl) && ckey_utf[loc] != tmpc);
+ }
+ candidate_utf[i] = tmpc;
+ }
+ return wlst.size();
+}
+
+// error is wrong char in place of correct one
+int SuggestMgr::badchar(std::vector<std::string>& wlst, const char* word, int cpdsuggest) {
+ std::string candidate(word);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (size_t j = 0; j < ctryl; ++j) {
+ for (std::string::reverse_iterator aI = candidate.rbegin(), aEnd = candidate.rend(); aI != aEnd; ++aI) {
+ char tmpc = *aI;
+ if (ctry[j] == tmpc)
+ continue;
+ *aI = ctry[j];
+ testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
+ if (!timer)
+ return wlst.size();
+ *aI = tmpc;
+ }
+ }
+ return wlst.size();
+}
+
+// error is wrong char in place of correct one
+int SuggestMgr::badchar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ std::string candidate;
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+ // swap out each char one by one and try all the tryme
+ // chars in its place to see if that makes a good word
+ for (size_t j = 0; j < ctryl; ++j) {
+ for (int i = wl - 1; i >= 0; i--) {
+ w_char tmpc = candidate_utf[i];
+ if (tmpc == ctry_utf[j])
+ continue;
+ candidate_utf[i] = ctry_utf[j];
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
+ if (!timer)
+ return wlst.size();
+ candidate_utf[i] = tmpc;
+ }
+ }
+ return wlst.size();
+}
+
+// error is word has an extra letter it does not need
+int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return wlst.size();
+ // try omitting one char of word at a time
+ for (size_t i = 0; i < candidate_utf.size(); ++i) {
+ size_t index = candidate_utf.size() - 1 - i;
+ w_char tmpc = candidate_utf[index];
+ candidate_utf.erase(candidate_utf.begin() + index);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ candidate_utf.insert(candidate_utf.begin() + index, tmpc);
+ }
+ return wlst.size();
+}
+
+// error is word has an extra letter it does not need
+int SuggestMgr::extrachar(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return wlst.size();
+ // try omitting one char of word at a time
+ for (size_t i = 0; i < candidate.size(); ++i) {
+ size_t index = candidate.size() - 1 - i;
+ char tmpc = candidate[index];
+ candidate.erase(candidate.begin() + index);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ candidate.insert(candidate.begin() + index, tmpc);
+ }
+ return wlst.size();
+}
+
+// error is missing a letter it needs
+int SuggestMgr::forgotchar(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+
+ // try inserting a tryme character before every letter (and the null
+ // terminator)
+ for (size_t k = 0; k < ctryl; ++k) {
+ for (size_t i = 0; i <= candidate.size(); ++i) {
+ size_t index = candidate.size() - i;
+ candidate.insert(candidate.begin() + index, ctry[k]);
+ testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
+ if (!timer)
+ return wlst.size();
+ candidate.erase(candidate.begin() + index);
+ }
+ }
+ return wlst.size();
+}
+
+// error is missing a letter it needs
+int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ clock_t timelimit = clock();
+ int timer = MINTIMER;
+
+ // try inserting a tryme character at the end of the word and before every
+ // letter
+ for (size_t k = 0; k < ctryl; ++k) {
+ for (size_t i = 0; i <= candidate_utf.size(); ++i) {
+ size_t index = candidate_utf.size() - i;
+ candidate_utf.insert(candidate_utf.begin() + index, ctry_utf[k]);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, &timer, &timelimit);
+ if (!timer)
+ return wlst.size();
+ candidate_utf.erase(candidate_utf.begin() + index);
+ }
+ }
+ return wlst.size();
+}
+
+/* error is should have been two words
+ * return value is true, if there is a dictionary word pair,
+ * or there was already a good suggestion before calling
+ * this function.
+ */
+bool SuggestMgr::twowords(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest,
+ bool good) {
+ int c2;
+ int forbidden = 0;
+ int cwrd;
+
+ int wl = strlen(word);
+ if (wl < 3)
+ return false;
+
+ if (langnum == LANG_hu)
+ forbidden = check_forbidden(word, wl);
+
+ char* candidate = (char*)malloc(wl + 2);
+ strcpy(candidate + 1, word);
+
+ // split the string into two pieces after every char
+ // if both pieces are good words make them a suggestion
+ for (char* p = candidate + 1; p[1] != '\0'; p++) {
+ p[-1] = *p;
+ // go to end of the UTF-8 character
+ while (utf8 && ((p[1] & 0xc0) == 0x80)) {
+ *p = p[1];
+ p++;
+ }
+ if (utf8 && p[1] == '\0')
+ break; // last UTF-8 character
+
+ // Suggest only word pairs, if they are listed in the dictionary.
+ // For example, adding "a lot" to the English dic file will
+ // result only "alot" -> "a lot" suggestion instead of
+ // "alto, slot, alt, lot, allot, aloft, aloe, clot, plot, blot, a lot".
+ // Note: using "ph:alot" keeps the other suggestions:
+ // a lot ph:alot
+ // alot -> a lot, alto, slot...
+ *p = ' ';
+ if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) {
+ // remove not word pair suggestions
+ if (!good) {
+ good = true;
+ wlst.clear();
+ }
+ wlst.insert(wlst.begin(), candidate);
+ }
+
+ // word pairs with dash?
+ if (lang_with_dash_usage) {
+ *p = '-';
+
+ if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) {
+ // remove not word pair suggestions
+ if (!good) {
+ good = true;
+ wlst.clear();
+ }
+ wlst.insert(wlst.begin(), candidate);
+ }
+ }
+
+ if (wlst.size() < maxSug && !nosplitsugs && !good) {
+ *p = '\0';
+ int c1 = checkword(candidate, cpdsuggest, NULL, NULL);
+ if (c1) {
+ c2 = checkword((p + 1), cpdsuggest, NULL, NULL);
+ if (c2) {
+ // spec. Hungarian code (TODO need a better compound word support)
+ if ((langnum == LANG_hu) && !forbidden &&
+ // if 3 repeating letter, use - instead of space
+ (((p[-1] == p[1]) &&
+ (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) ||
+ // or multiple compounding, with more, than 6 syllables
+ ((c1 == 3) && (c2 >= 2))))
+ *p = '-';
+ else
+ *p = ' ';
+
+ cwrd = 1;
+ for (size_t k = 0; k < wlst.size(); ++k) {
+ if (wlst[k] == candidate) {
+ cwrd = 0;
+ break;
+ }
+ }
+
+ if (cwrd && (wlst.size() < maxSug))
+ wlst.push_back(candidate);
+
+ // add two word suggestion with dash, depending on the language
+ // Note that cwrd doesn't modified for REP twoword sugg.
+ if ( !nosplitsugs && lang_with_dash_usage &&
+ mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) {
+ *p = '-';
+ for (size_t k = 0; k < wlst.size(); ++k) {
+ if (wlst[k] == candidate) {
+ cwrd = 0;
+ break;
+ }
+ }
+
+ if ((wlst.size() < maxSug) && cwrd)
+ wlst.push_back(candidate);
+ }
+ }
+ }
+ }
+ }
+ free(candidate);
+ return good;
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::swapchar(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return wlst.size();
+
+ // try swapping adjacent chars one by one
+ for (size_t i = 0; i < candidate.size() - 1; ++i) {
+ std::swap(candidate[i], candidate[i+1]);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ std::swap(candidate[i], candidate[i+1]);
+ }
+
+ // try double swaps for short words
+ // ahev -> have, owudl -> would
+ if (candidate.size() == 4 || candidate.size() == 5) {
+ candidate[0] = word[1];
+ candidate[1] = word[0];
+ candidate[2] = word[2];
+ candidate[candidate.size() - 2] = word[candidate.size() - 1];
+ candidate[candidate.size() - 1] = word[candidate.size() - 2];
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ if (candidate.size() == 5) {
+ candidate[0] = word[0];
+ candidate[1] = word[2];
+ candidate[2] = word[1];
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ }
+
+ return wlst.size();
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return wlst.size();
+
+ std::string candidate;
+ // try swapping adjacent chars one by one
+ for (size_t i = 0; i < candidate_utf.size() - 1; ++i) {
+ std::swap(candidate_utf[i], candidate_utf[i+1]);
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ std::swap(candidate_utf[i], candidate_utf[i+1]);
+ }
+
+ // try double swaps for short words
+ // ahev -> have, owudl -> would, suodn -> sound
+ if (candidate_utf.size() == 4 || candidate_utf.size() == 5) {
+ candidate_utf[0] = word[1];
+ candidate_utf[1] = word[0];
+ candidate_utf[2] = word[2];
+ candidate_utf[candidate_utf.size() - 2] = word[candidate_utf.size() - 1];
+ candidate_utf[candidate_utf.size() - 1] = word[candidate_utf.size() - 2];
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ if (candidate_utf.size() == 5) {
+ candidate_utf[0] = word[0];
+ candidate_utf[1] = word[2];
+ candidate_utf[2] = word[1];
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ }
+ return wlst.size();
+}
+
+// error is not adjacent letter were swapped
+int SuggestMgr::longswapchar(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ // try swapping not adjacent chars one by one
+ for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
+ for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) {
+ size_t distance = std::abs(std::distance(q, p));
+ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) {
+ std::swap(*p, *q);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ std::swap(*p, *q);
+ }
+ }
+ }
+ return wlst.size();
+}
+
+// error is adjacent letter were swapped
+int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ // try swapping not adjacent chars
+ for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
+ size_t distance = std::abs(std::distance(q, p));
+ if (distance > 1 && distance <= MAX_CHAR_DISTANCE) {
+ std::swap(*p, *q);
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ std::swap(*p, *q);
+ }
+ }
+ }
+ return wlst.size();
+}
+
+// error is a letter was moved
+int SuggestMgr::movechar(std::vector<std::string>& wlst,
+ const char* word,
+ int cpdsuggest) {
+ std::string candidate(word);
+ if (candidate.size() < 2)
+ return wlst.size();
+
+ // try moving a char
+ for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) {
+ for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ std::copy(word, word + candidate.size(), candidate.begin());
+ }
+
+ for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) {
+ for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ std::copy(word, word + candidate.size(), candidate.begin());
+ }
+
+ return wlst.size();
+}
+
+// error is a letter was moved
+int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
+ const w_char* word,
+ int wl,
+ int cpdsuggest) {
+ std::vector<w_char> candidate_utf(word, word + wl);
+ if (candidate_utf.size() < 2)
+ return wlst.size();
+
+ // try moving a char
+ for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
+ }
+
+ for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
+ for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) {
+ std::swap(*q, *(q - 1));
+ if (std::distance(p, q) < 2)
+ continue; // omit swap char
+ std::string candidate;
+ u16_u8(candidate, candidate_utf);
+ testsug(wlst, candidate, cpdsuggest, NULL, NULL);
+ }
+ std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
+ }
+
+ return wlst.size();
+}
+
+// generate a set of suggestions for very poorly spelled words
+void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
+ const char* w,
+ const std::vector<HashMgr*>& rHMgr,
+ int captype) {
+ int lval;
+ int sc;
+ int lp, lpphon;
+ int nonbmp = 0;
+
+ // exhaustively search through all root words
+ // keeping track of the MAX_ROOTS most similar root words
+ struct hentry* roots[MAX_ROOTS];
+ char* rootsphon[MAX_ROOTS];
+ int scores[MAX_ROOTS];
+ int scoresphon[MAX_ROOTS];
+ for (int i = 0; i < MAX_ROOTS; i++) {
+ roots[i] = NULL;
+ scores[i] = -100 * i;
+ rootsphon[i] = NULL;
+ scoresphon[i] = -100 * i;
+ }
+ lp = MAX_ROOTS - 1;
+ lpphon = MAX_ROOTS - 1;
+ int low = NGRAM_LOWERING;
+
+ std::string w2;
+ const char* word = w;
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ w2.assign(w);
+ if (utf8)
+ reverseword_utf(w2);
+ else
+ reverseword(w2);
+ word = w2.c_str();
+ }
+
+ std::vector<w_char> u8;
+ int nc = strlen(word);
+ int n = (utf8) ? u8_u16(u8, word) : nc;
+
+ // set character based ngram suggestion for words with non-BMP Unicode
+ // characters
+ if (n == -1) {
+ utf8 = 0; // XXX not state-free
+ n = nc;
+ nonbmp = 1;
+ low = 0;
+ }
+
+ struct hentry* hp = NULL;
+ int col = -1;
+ phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
+ std::string target;
+ std::string candidate;
+ std::vector<w_char> w_candidate;
+ if (ph) {
+ if (utf8) {
+ u8_u16(w_candidate, word);
+ mkallcap_utf(w_candidate, langnum);
+ u16_u8(candidate, w_candidate);
+ } else {
+ candidate.assign(word);
+ if (!nonbmp)
+ mkallcap(candidate, csconv);
+ }
+ target = phonet(candidate, *ph); // XXX phonet() is 8-bit (nc, not n)
+ }
+
+ FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL;
+ FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL;
+ FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
+ FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
+
+ std::vector<w_char> w_word, w_target;
+ if (utf8) {
+ u8_u16(w_word, word);
+ u8_u16(w_target, target);
+ }
+
+ std::string f;
+ std::vector<w_char> w_f;
+
+ for (size_t i = 0; i < rHMgr.size(); ++i) {
+ while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
+ // skip exceptions
+ if (
+ // skip it, if the word length different by 5 or
+ // more characters (to avoid strange suggestions)
+ // (except Unicode characters over BMP)
+ (((abs(n - hp->clen) > 4) && !nonbmp)) ||
+ // don't suggest capitalized dictionary words for
+ // lower case misspellings in ngram suggestions, except
+ // - PHONE usage, or
+ // - in the case of German, where not only proper
+ // nouns are capitalized, or
+ // - the capitalized word has special pronunciation
+ ((captype == NOCAP) && (hp->var & H_OPT_INITCAP) &&
+ !ph && (langnum != LANG_de) && !(hp->var & H_OPT_PHON)) ||
+ // or it has one of the following special flags
+ ((hp->astr) && (pAMgr) &&
+ (TESTAFF(hp->astr, forbiddenword, hp->alen) ||
+ TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) ||
+ TESTAFF(hp->astr, nosuggest, hp->alen) ||
+ TESTAFF(hp->astr, nongramsuggest, hp->alen) ||
+ TESTAFF(hp->astr, onlyincompound, hp->alen)))
+ )
+ continue;
+
+ if (utf8) {
+ u8_u16(w_f, HENTRY_WORD(hp));
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+ if (low) {
+ // lowering dictionary word
+ mkallsmall_utf(w_f, langnum);
+ }
+ sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+ } else {
+ f.assign(HENTRY_WORD(hp));
+
+ int leftcommon = leftcommonsubstring(word, f.c_str());
+ if (low) {
+ // lowering dictionary word
+ mkallsmall(f, csconv);
+ }
+ sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+ }
+
+ // check special pronunciation
+ f.clear();
+ if ((hp->var & H_OPT_PHON) &&
+ copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) {
+ int sc2;
+ if (utf8) {
+ u8_u16(w_f, f);
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+ if (low) {
+ // lowering dictionary word
+ mkallsmall_utf(w_f, langnum);
+ }
+ sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
+ } else {
+ int leftcommon = leftcommonsubstring(word, f.c_str());
+ if (low) {
+ // lowering dictionary word
+ mkallsmall(f, csconv);
+ }
+ sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
+ }
+ if (sc2 > sc)
+ sc = sc2;
+ }
+
+ int scphon = -20000;
+ if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
+ if (utf8) {
+ u8_u16(w_candidate, HENTRY_WORD(hp));
+ mkallcap_utf(w_candidate, langnum);
+ u16_u8(candidate, w_candidate);
+ } else {
+ candidate = HENTRY_WORD(hp);
+ mkallcap(candidate, csconv);
+ }
+ f = phonet(candidate, *ph);
+ if (utf8) {
+ u8_u16(w_f, f);
+ scphon = 2 * ngram(3, w_target, w_f,
+ NGRAM_LONGER_WORSE);
+ } else {
+ scphon = 2 * ngram(3, target, f,
+ NGRAM_LONGER_WORSE);
+ }
+ }
+
+ if (sc > scores[lp]) {
+ scores[lp] = sc;
+ roots[lp] = hp;
+ lval = sc;
+ for (int j = 0; j < MAX_ROOTS; j++)
+ if (scores[j] < lval) {
+ lp = j;
+ lval = scores[j];
+ }
+ }
+
+ if (scphon > scoresphon[lpphon]) {
+ scoresphon[lpphon] = scphon;
+ rootsphon[lpphon] = HENTRY_WORD(hp);
+ lval = scphon;
+ for (int j = 0; j < MAX_ROOTS; j++)
+ if (scoresphon[j] < lval) {
+ lpphon = j;
+ lval = scoresphon[j];
+ }
+ }
+ }
+ }
+
+ // find minimum threshold for a passable suggestion
+ // mangle original word three differnt ways
+ // and score them to generate a minimum acceptable score
+ std::vector<w_char> w_mw;
+ int thresh = 0;
+ for (int sp = 1; sp < 4; sp++) {
+ if (utf8) {
+ w_mw = w_word;
+ for (int k = sp; k < n; k += 4) {
+ w_mw[k].l = '*';
+ w_mw[k].h = 0;
+ }
+
+ if (low) {
+ // lowering dictionary word
+ mkallsmall_utf(w_mw, langnum);
+ }
+
+ thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
+ } else {
+ std::string mw = word;
+ for (int k = sp; k < n; k += 4)
+ mw[k] = '*';
+
+ if (low) {
+ // lowering dictionary word
+ mkallsmall(mw, csconv);
+ }
+
+ thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
+ }
+ }
+ thresh = thresh / 3;
+ thresh--;
+
+ // now expand affixes on each of these root words and
+ // and use length adjusted ngram scores to select
+ // possible suggestions
+ char* guess[MAX_GUESS];
+ char* guessorig[MAX_GUESS];
+ int gscore[MAX_GUESS];
+ for (int i = 0; i < MAX_GUESS; i++) {
+ guess[i] = NULL;
+ guessorig[i] = NULL;
+ gscore[i] = -100 * i;
+ }
+
+ lp = MAX_GUESS - 1;
+
+ struct guessword* glst;
+ glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword));
+ if (!glst) {
+ if (nonbmp)
+ utf8 = 1;
+ return;
+ }
+
+ for (int i = 0; i < MAX_ROOTS; i++) {
+ if (roots[i]) {
+ struct hentry* rp = roots[i];
+
+ f.clear();
+ const char *field = NULL;
+ if ((rp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(rp), MORPH_PHON))
+ field = f.c_str();
+ int nw = pAMgr->expand_rootword(
+ glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word,
+ nc, field);
+
+ for (int k = 0; k < nw; k++) {
+ if (utf8) {
+ u8_u16(w_f, glst[k].word);
+
+ int leftcommon = leftcommonsubstring(w_word, w_f);
+ if (low) {
+ // lowering dictionary word
+ mkallsmall_utf(w_f, langnum);
+ }
+
+ sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
+ } else {
+ f = glst[k].word;
+
+ int leftcommon = leftcommonsubstring(word, f.c_str());
+ if (low) {
+ // lowering dictionary word
+ mkallsmall(f, csconv);
+ }
+
+ sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
+ }
+
+ if (sc > thresh) {
+ if (sc > gscore[lp]) {
+ if (guess[lp]) {
+ free(guess[lp]);
+ if (guessorig[lp]) {
+ free(guessorig[lp]);
+ guessorig[lp] = NULL;
+ }
+ }
+ gscore[lp] = sc;
+ guess[lp] = glst[k].word;
+ guessorig[lp] = glst[k].orig;
+ lval = sc;
+ for (int j = 0; j < MAX_GUESS; j++)
+ if (gscore[j] < lval) {
+ lp = j;
+ lval = gscore[j];
+ }
+ } else {
+ free(glst[k].word);
+ if (glst[k].orig)
+ free(glst[k].orig);
+ }
+ } else {
+ free(glst[k].word);
+ if (glst[k].orig)
+ free(glst[k].orig);
+ }
+ }
+ }
+ }
+ free(glst);
+
+ // now we are done generating guesses
+ // sort in order of decreasing score
+
+ bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
+ if (ph)
+ bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
+
+ // weight suggestions with a similarity index, based on
+ // the longest common subsequent algorithm and resort
+
+ int is_swap = 0;
+ int re = 0;
+ double fact = 1.0;
+ if (pAMgr) {
+ int maxd = pAMgr->get_maxdiff();
+ if (maxd >= 0)
+ fact = (10.0 - maxd) / 5.0;
+ }
+
+ std::vector<w_char> w_gl;
+ for (int i = 0; i < MAX_GUESS; i++) {
+ if (guess[i]) {
+ // lowering guess[i]
+ std::string gl;
+ int len;
+ if (utf8) {
+ len = u8_u16(w_gl, guess[i]);
+ mkallsmall_utf(w_gl, langnum);
+ u16_u8(gl, w_gl);
+ } else {
+ gl.assign(guess[i]);
+ if (!nonbmp)
+ mkallsmall(gl, csconv);
+ len = strlen(guess[i]);
+ }
+
+ int _lcs = lcslen(word, gl.c_str());
+
+ // same characters with different casing
+ if ((n == len) && (n == _lcs)) {
+ gscore[i] += 2000;
+ break;
+ }
+ // using 2-gram instead of 3, and other weightening
+
+ if (utf8) {
+ u8_u16(w_gl, gl);
+ //w_gl is lowercase already at this point
+ re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ if (low) {
+ w_f = w_word;
+ // lowering dictionary word
+ mkallsmall_utf(w_f, langnum);
+ re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ } else {
+ re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ }
+ } else {
+ //gl is lowercase already at this point
+ re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ if (low) {
+ f = word;
+ // lowering dictionary word
+ mkallsmall(f, csconv);
+ re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ } else {
+ re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
+ }
+ }
+
+ int ngram_score, leftcommon_score;
+ if (utf8) {
+ //w_gl is lowercase already at this point
+ ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
+ leftcommon_score = leftcommonsubstring(w_word, w_gl);
+ } else {
+ //gl is lowercase already at this point
+ ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
+ leftcommon_score = leftcommonsubstring(word, gl.c_str());
+ }
+ gscore[i] =
+ // length of longest common subsequent minus length difference
+ 2 * _lcs - abs((int)(n - len)) +
+ // weight length of the left common substring
+ leftcommon_score +
+ // weight equal character positions
+ (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap)
+ ? 1
+ : 0) +
+ // swap character (not neighboring)
+ ((is_swap) ? 10 : 0) +
+ // ngram
+ ngram_score +
+ // weighted ngrams
+ re +
+ // different limit for dictionaries with PHONE rules
+ (ph ? (re < len * fact ? -1000 : 0)
+ : (re < (n + len) * fact ? -1000 : 0));
+ }
+ }
+
+ bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS);
+
+ // phonetic version
+ if (ph)
+ for (int i = 0; i < MAX_ROOTS; i++) {
+ if (rootsphon[i]) {
+ // lowering rootphon[i]
+ std::string gl;
+ int len;
+ if (utf8) {
+ len = u8_u16(w_gl, rootsphon[i]);
+ mkallsmall_utf(w_gl, langnum);
+ u16_u8(gl, w_gl);
+ } else {
+ gl.assign(rootsphon[i]);
+ if (!nonbmp)
+ mkallsmall(gl, csconv);
+ len = strlen(rootsphon[i]);
+ }
+
+ // weight length of the left common substring
+ int leftcommon_score;
+ if (utf8)
+ leftcommon_score = leftcommonsubstring(w_word, w_gl);
+ else
+ leftcommon_score = leftcommonsubstring(word, gl.c_str());
+ // heuristic weigthing of ngram scores
+ scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) +
+ leftcommon_score;
+ }
+ }
+
+ if (ph)
+ bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS);
+
+ // copy over
+ size_t oldns = wlst.size();
+
+ int same = 0;
+ for (int i = 0; i < MAX_GUESS; i++) {
+ if (guess[i]) {
+ if ((wlst.size() < oldns + maxngramsugs) && (wlst.size() < maxSug) &&
+ (!same || (gscore[i] > 1000))) {
+ int unique = 1;
+ // leave only excellent suggestions, if exists
+ if (gscore[i] > 1000)
+ same = 1;
+ else if (gscore[i] < -100) {
+ same = 1;
+ // keep the best ngram suggestions, unless in ONLYMAXDIFF mode
+ if (wlst.size() > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) {
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ continue;
+ }
+ }
+ for (size_t j = 0; j < wlst.size(); ++j) {
+ // don't suggest previous suggestions or a previous suggestion with
+ // prefixes or affixes
+ if ((!guessorig[i] && strstr(guess[i], wlst[j].c_str())) ||
+ (guessorig[i] && strstr(guessorig[i], wlst[j].c_str())) ||
+ // check forbidden words
+ !checkword(guess[i], 0, NULL, NULL)) {
+ unique = 0;
+ break;
+ }
+ }
+ if (unique) {
+ if (guessorig[i]) {
+ wlst.push_back(guessorig[i]);
+ } else {
+ wlst.push_back(guess[i]);
+ }
+ }
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ } else {
+ free(guess[i]);
+ if (guessorig[i])
+ free(guessorig[i]);
+ }
+ }
+ }
+
+ oldns = wlst.size();
+ if (ph)
+ for (int i = 0; i < MAX_ROOTS; i++) {
+ if (rootsphon[i]) {
+ if ((wlst.size() < oldns + MAXPHONSUGS) && (wlst.size() < maxSug)) {
+ int unique = 1;
+ for (size_t j = 0; j < wlst.size(); ++j) {
+ // don't suggest previous suggestions or a previous suggestion with
+ // prefixes or affixes
+ if (strstr(rootsphon[i], wlst[j].c_str()) ||
+ // check forbidden words
+ !checkword(rootsphon[i], 0, NULL, NULL)) {
+ unique = 0;
+ break;
+ }
+ }
+ if (unique) {
+ wlst.push_back(rootsphon[i]);
+ }
+ }
+ }
+ }
+
+ if (nonbmp)
+ utf8 = 1;
+}
+
+// see if a candidate suggestion is spelled correctly
+// needs to check both root words and words with affixes
+
+// obsolote MySpell-HU modifications:
+// return value 2 and 3 marks compounding with hyphen (-)
+// `3' marks roots without suffix
+int SuggestMgr::checkword(const std::string& word,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit) {
+ // check time limit
+ if (timer) {
+ (*timer)--;
+ if (!(*timer) && timelimit) {
+ if ((clock() - *timelimit) > TIMELIMIT)
+ return 0;
+ *timer = MAXPLUSTIMER;
+ }
+ }
+
+ if (pAMgr) {
+ struct hentry* rv = NULL;
+ int nosuffix = 0;
+
+ if (cpdsuggest == 1) {
+ if (pAMgr->get_compound()) {
+ struct hentry* rv2 = NULL;
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ rv = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 1, 0); // EXT
+ if (rv &&
+ (!(rv2 = pAMgr->lookup(word.c_str())) || !rv2->astr ||
+ !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) ||
+ TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen))))
+ return 3; // XXX obsolote categorisation + only ICONV needs affix
+ // flag check?
+ }
+ return 0;
+ }
+
+ rv = pAMgr->lookup(word.c_str());
+
+ if (rv) {
+ if ((rv->astr) &&
+ (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_substandard(), rv->alen)))
+ return 0;
+ while (rv) {
+ if (rv->astr &&
+ (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
+ rv = rv->next_homonym;
+ } else
+ break;
+ }
+ } else
+ rv = pAMgr->prefix_check(word.c_str(), word.size(),
+ 0); // only prefix, and prefix + suffix XXX
+
+ if (rv) {
+ nosuffix = 1;
+ } else {
+ rv = pAMgr->suffix_check(word.c_str(), word.size(), 0, NULL,
+ FLAG_NULL, FLAG_NULL, IN_CPD_NOT); // only suffix
+ }
+
+ if (!rv && pAMgr->have_contclass()) {
+ rv = pAMgr->suffix_check_twosfx(word.c_str(), word.size(), 0, NULL, FLAG_NULL);
+ if (!rv)
+ rv = pAMgr->prefix_check_twosfx(word.c_str(), word.size(), 0, FLAG_NULL);
+ }
+
+ // check forbidden words
+ if ((rv) && (rv->astr) &&
+ (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
+ return 0;
+
+ if (rv) { // XXX obsolote
+ if ((pAMgr->get_compoundflag()) &&
+ TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen))
+ return 2 + nosuffix;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int SuggestMgr::check_forbidden(const char* word, int len) {
+ if (pAMgr) {
+ struct hentry* rv = pAMgr->lookup(word);
+ if (rv && rv->astr &&
+ (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen)))
+ rv = NULL;
+ if (!(pAMgr->prefix_check(word, len, 1)))
+ rv = pAMgr->suffix_check(word, len, 0, NULL,
+ FLAG_NULL, FLAG_NULL, IN_CPD_NOT); // prefix+suffix, suffix
+ // check forbidden words
+ if ((rv) && (rv->astr) &&
+ TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen))
+ return 1;
+ }
+ return 0;
+}
+
+std::string SuggestMgr::suggest_morph(const std::string& in_w) {
+ std::string result;
+
+ struct hentry* rv = NULL;
+
+ if (!pAMgr)
+ return std::string();
+
+ std::string w(in_w);
+
+ // word reversing wrapper for complex prefixes
+ if (complexprefixes) {
+ if (utf8)
+ reverseword_utf(w);
+ else
+ reverseword(w);
+ }
+
+ rv = pAMgr->lookup(w.c_str());
+
+ while (rv) {
+ if ((!rv->astr) ||
+ !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) ||
+ TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) {
+ if (!HENTRY_FIND(rv, MORPH_STEM)) {
+ result.push_back(MSEP_FLD);
+ result.append(MORPH_STEM);
+ result.append(w);
+ }
+ if (HENTRY_DATA(rv)) {
+ result.push_back(MSEP_FLD);
+ result.append(HENTRY_DATA2(rv));
+ }
+ result.push_back(MSEP_REC);
+ }
+ rv = rv->next_homonym;
+ }
+
+ std::string st = pAMgr->affix_check_morph(w.c_str(), w.size());
+ if (!st.empty()) {
+ result.append(st);
+ }
+
+ if (pAMgr->get_compound() && result.empty()) {
+ struct hentry* rwords[100]; // buffer for COMPOUND pattern checking
+ pAMgr->compound_check_morph(w.c_str(), w.size(), 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, result,
+ NULL);
+ }
+
+ line_uniq(result, MSEP_REC);
+
+ return result;
+}
+
+static int get_sfxcount(const char* morph) {
+ if (!morph || !*morph)
+ return 0;
+ int n = 0;
+ const char* old = morph;
+ morph = strstr(morph, MORPH_DERI_SFX);
+ if (!morph)
+ morph = strstr(old, MORPH_INFL_SFX);
+ if (!morph)
+ morph = strstr(old, MORPH_TERM_SFX);
+ while (morph) {
+ n++;
+ old = morph;
+ morph = strstr(morph + 1, MORPH_DERI_SFX);
+ if (!morph)
+ morph = strstr(old + 1, MORPH_INFL_SFX);
+ if (!morph)
+ morph = strstr(old + 1, MORPH_TERM_SFX);
+ }
+ return n;
+}
+
+/* affixation */
+std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) {
+ std::string result;
+ int sfxcount = get_sfxcount(pattern);
+
+ if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount)
+ return result;
+
+ if (HENTRY_DATA(rv)) {
+ std::string aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen,
+ HENTRY_DATA(rv), pattern, 0);
+ if (!aff.empty()) {
+ result.append(aff);
+ result.push_back(MSEP_REC);
+ }
+ }
+
+ // check all allomorphs
+ char* p = NULL;
+ if (HENTRY_DATA(rv))
+ p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH);
+ while (p) {
+ p += MORPH_TAG_LEN;
+ int plen = fieldlen(p);
+ std::string allomorph(p, plen);
+ struct hentry* rv2 = pAMgr->lookup(allomorph.c_str());
+ while (rv2) {
+ // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <=
+ // sfxcount) {
+ if (HENTRY_DATA(rv2)) {
+ char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM);
+ if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv),
+ fieldlen(st + MORPH_TAG_LEN)) == 0)) {
+ std::string aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr,
+ rv2->alen, HENTRY_DATA(rv2), pattern, 0);
+ if (!aff.empty()) {
+ result.append(aff);
+ result.push_back(MSEP_REC);
+ }
+ }
+ }
+ rv2 = rv2->next_homonym;
+ }
+ p = strstr(p + plen, MORPH_ALLOMORPH);
+ }
+
+ return result;
+}
+
+std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const std::string& in_pattern) {
+ if (desc.empty() || !pAMgr)
+ return std::string();
+
+ const char* pattern = in_pattern.c_str();
+ std::string result2;
+ std::string newpattern;
+ struct hentry* rv = NULL;
+
+ // search affixed forms with and without derivational suffixes
+ while (1) {
+ for (size_t k = 0; k < desc.size(); ++k) {
+ std::string result;
+
+ // add compound word parts (except the last one)
+ const char* s = desc[k].c_str();
+ const char* part = strstr(s, MORPH_PART);
+ if (part) {
+ const char* nextpart = strstr(part + 1, MORPH_PART);
+ while (nextpart) {
+ std::string field;
+ copy_field(field, part, MORPH_PART);
+ result.append(field);
+ part = nextpart;
+ nextpart = strstr(part + 1, MORPH_PART);
+ }
+ s = part;
+ }
+
+ std::string tok(s);
+ size_t pos = tok.find(" | ");
+ while (pos != std::string::npos) {
+ tok[pos + 1] = MSEP_ALT;
+ pos = tok.find(" | ", pos);
+ }
+ std::vector<std::string> pl = line_tok(tok, MSEP_ALT);
+ for (size_t i = 0; i < pl.size(); ++i) {
+ // remove inflectional and terminal suffixes
+ size_t is = pl[i].find(MORPH_INFL_SFX);
+ if (is != std::string::npos)
+ pl[i].resize(is);
+ size_t ts = pl[i].find(MORPH_TERM_SFX);
+ while (ts != std::string::npos) {
+ pl[i][ts] = '_';
+ ts = pl[i].find(MORPH_TERM_SFX);
+ }
+ const char* st = strstr(s, MORPH_STEM);
+ if (st) {
+ copy_field(tok, st, MORPH_STEM);
+ rv = pAMgr->lookup(tok.c_str());
+ while (rv) {
+ std::string newpat(pl[i]);
+ newpat.append(pattern);
+ std::string sg = suggest_hentry_gen(rv, newpat.c_str());
+ if (sg.empty())
+ sg = suggest_hentry_gen(rv, pattern);
+ if (!sg.empty()) {
+ std::vector<std::string> gen = line_tok(sg, MSEP_REC);
+ for (size_t j = 0; j < gen.size(); ++j) {
+ result2.push_back(MSEP_REC);
+ result2.append(result);
+ if (pl[i].find(MORPH_SURF_PFX) != std::string::npos) {
+ std::string field;
+ copy_field(field, pl[i], MORPH_SURF_PFX);
+ result2.append(field);
+ }
+ result2.append(gen[j]);
+ }
+ }
+ rv = rv->next_homonym;
+ }
+ }
+ }
+ }
+
+ if (!result2.empty() || !strstr(pattern, MORPH_DERI_SFX))
+ break;
+
+ newpattern.assign(pattern);
+ mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX);
+ pattern = newpattern.c_str();
+ }
+ return result2;
+}
+
+// generate an n-gram score comparing s1 and s2, UTF16 version
+int SuggestMgr::ngram(int n,
+ const std::vector<w_char>& su1,
+ const std::vector<w_char>& su2,
+ int opt) {
+ int nscore = 0;
+ int ns;
+ int l1;
+ int l2;
+ int test = 0;
+
+ l1 = su1.size();
+ l2 = su2.size();
+ if (l2 == 0)
+ return 0;
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+ int k = 0;
+ for (int l = 0; l <= (l2 - j); l++) {
+ for (k = 0; k < j; k++) {
+ const w_char& c1 = su1[i + k];
+ const w_char& c2 = su2[l + k];
+ if ((c1.l != c2.l) || (c1.h != c2.h))
+ break;
+ }
+ if (k == j) {
+ ns++;
+ break;
+ }
+ }
+ if (k != j && opt & NGRAM_WEIGHTED) {
+ ns--;
+ test++;
+ if (i == 0 || i == l1 - j)
+ ns--; // side weight
+ }
+ }
+ nscore = nscore + ns;
+ if (ns < 2 && !(opt & NGRAM_WEIGHTED))
+ break;
+ }
+
+ ns = 0;
+ if (opt & NGRAM_LONGER_WORSE)
+ ns = (l2 - l1) - 2;
+ if (opt & NGRAM_ANY_MISMATCH)
+ ns = abs(l2 - l1) - 2;
+ ns = (nscore - ((ns > 0) ? ns : 0));
+ return ns;
+}
+
+// generate an n-gram score comparing s1 and s2, non-UTF16 version
+int SuggestMgr::ngram(int n,
+ const std::string& s1,
+ const std::string& s2,
+ int opt) {
+ int nscore = 0;
+ int ns;
+ int l1;
+ int l2;
+ int test = 0;
+
+ l2 = s2.size();
+ if (l2 == 0)
+ return 0;
+ l1 = s1.size();
+ for (int j = 1; j <= n; j++) {
+ ns = 0;
+ for (int i = 0; i <= (l1 - j); i++) {
+ //s2 is haystack, s1[i..i+j) is needle
+ if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
+ ns++;
+ } else if (opt & NGRAM_WEIGHTED) {
+ ns--;
+ test++;
+ if (i == 0 || i == l1 - j)
+ ns--; // side weight
+ }
+ }
+ nscore = nscore + ns;
+ if (ns < 2 && !(opt & NGRAM_WEIGHTED))
+ break;
+ }
+
+ ns = 0;
+ if (opt & NGRAM_LONGER_WORSE)
+ ns = (l2 - l1) - 2;
+ if (opt & NGRAM_ANY_MISMATCH)
+ ns = abs(l2 - l1) - 2;
+ ns = (nscore - ((ns > 0) ? ns : 0));
+ return ns;
+}
+
+// length of the left common substring of s1 and (decapitalised) s2, UTF version
+int SuggestMgr::leftcommonsubstring(
+ const std::vector<w_char>& su1,
+ const std::vector<w_char>& su2) {
+ int l1 = su1.size();
+ int l2 = su2.size();
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ if (l1 && l2 && su1[l1 - 1] == su2[l2 - 1])
+ return 1;
+ } else {
+ unsigned short idx = su2.empty() ? 0 : (su2[0].h << 8) + su2[0].l;
+ unsigned short otheridx = su1.empty() ? 0 : (su1[0].h << 8) + su1[0].l;
+ if (otheridx != idx && (otheridx != unicodetolower(idx, langnum)))
+ return 0;
+ int i;
+ for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) &&
+ (su1[i].h == su2[i].h);
+ i++)
+ ;
+ return i;
+ }
+ return 0;
+}
+
+// length of the left common substring of s1 and (decapitalised) s2, non-UTF
+int SuggestMgr::leftcommonsubstring(
+ const char* s1,
+ const char* s2) {
+ if (complexprefixes) {
+ int l1 = strlen(s1);
+ int l2 = strlen(s2);
+ if (l1 <= l2 && s2[l1 - 1] == s2[l2 - 1])
+ return 1;
+ } else if (csconv) {
+ const char* olds = s1;
+ // decapitalise dictionary word
+ if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower))
+ return 0;
+ do {
+ s1++;
+ s2++;
+ } while ((*s1 == *s2) && (*s1 != '\0'));
+ return (int)(s1 - olds);
+ }
+ return 0;
+}
+
+int SuggestMgr::commoncharacterpositions(const char* s1,
+ const char* s2,
+ int* is_swap) {
+ int num = 0;
+ int diff = 0;
+ int diffpos[2];
+ *is_swap = 0;
+ if (utf8) {
+ std::vector<w_char> su1;
+ std::vector<w_char> su2;
+ int l1 = u8_u16(su1, s1);
+ int l2 = u8_u16(su2, s2);
+
+ if (l1 <= 0 || l2 <= 0)
+ return 0;
+
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
+ } else {
+ su2[0] = lower_utf(su2[0], langnum);
+ }
+ for (int i = 0; (i < l1) && (i < l2); i++) {
+ if (su1[i] == su2[i]) {
+ num++;
+ } else {
+ if (diff < 2)
+ diffpos[diff] = i;
+ diff++;
+ }
+ }
+ if ((diff == 2) && (l1 == l2) &&
+ (su1[diffpos[0]] == su2[diffpos[1]]) &&
+ (su1[diffpos[1]] == su2[diffpos[0]]))
+ *is_swap = 1;
+ } else {
+ size_t i;
+ std::string t(s2);
+ // decapitalize dictionary word
+ if (complexprefixes) {
+ size_t l2 = t.size();
+ t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower;
+ } else {
+ mkallsmall(t, csconv);
+ }
+ for (i = 0; i < t.size() && (*(s1 + i) != 0); ++i) {
+ if (*(s1 + i) == t[i]) {
+ num++;
+ } else {
+ if (diff < 2)
+ diffpos[diff] = i;
+ diff++;
+ }
+ }
+ if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() &&
+ (*(s1 + diffpos[0]) == t[diffpos[1]]) &&
+ (*(s1 + diffpos[1]) == t[diffpos[0]]))
+ *is_swap = 1;
+ }
+ return num;
+}
+
+int SuggestMgr::mystrlen(const char* word) {
+ if (utf8) {
+ std::vector<w_char> w;
+ return u8_u16(w, word);
+ } else
+ return strlen(word);
+}
+
+// sort in decreasing order of score
+void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) {
+ int m = 1;
+ while (m < n) {
+ int j = m;
+ while (j > 0) {
+ if (rsc[j - 1] < rsc[j]) {
+ int sctmp = rsc[j - 1];
+ char* wdtmp = rword[j - 1];
+ rsc[j - 1] = rsc[j];
+ rword[j - 1] = rword[j];
+ rsc[j] = sctmp;
+ rword[j] = wdtmp;
+ if (rword2) {
+ wdtmp = rword2[j - 1];
+ rword2[j - 1] = rword2[j];
+ rword2[j] = wdtmp;
+ }
+ j--;
+ } else
+ break;
+ }
+ m++;
+ }
+ return;
+}
+
+// longest common subsequence
+void SuggestMgr::lcs(const char* s,
+ const char* s2,
+ int* l1,
+ int* l2,
+ char** result) {
+ int n, m;
+ std::vector<w_char> su;
+ std::vector<w_char> su2;
+ char* b;
+ char* c;
+ int i;
+ int j;
+ if (utf8) {
+ m = u8_u16(su, s);
+ n = u8_u16(su2, s2);
+ } else {
+ m = strlen(s);
+ n = strlen(s2);
+ }
+ c = (char*)malloc((m + 1) * (n + 1));
+ b = (char*)malloc((m + 1) * (n + 1));
+ if (!c || !b) {
+ if (c)
+ free(c);
+ if (b)
+ free(b);
+ *result = NULL;
+ return;
+ }
+ for (i = 1; i <= m; i++)
+ c[i * (n + 1)] = 0;
+ for (j = 0; j <= n; j++)
+ c[j] = 0;
+ for (i = 1; i <= m; i++) {
+ for (j = 1; j <= n; j++) {
+ if (((utf8) && (su[i - 1] == su2[j - 1])) ||
+ ((!utf8) && (s[i - 1] == s2[j - 1]))) {
+ c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
+ b[i * (n + 1) + j] = LCS_UPLEFT;
+ } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) {
+ c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j];
+ b[i * (n + 1) + j] = LCS_UP;
+ } else {
+ c[i * (n + 1) + j] = c[i * (n + 1) + j - 1];
+ b[i * (n + 1) + j] = LCS_LEFT;
+ }
+ }
+ }
+ *result = b;
+ free(c);
+ *l1 = m;
+ *l2 = n;
+}
+
+int SuggestMgr::lcslen(const char* s, const char* s2) {
+ int m;
+ int n;
+ int i;
+ int j;
+ char* result;
+ int len = 0;
+ lcs(s, s2, &m, &n, &result);
+ if (!result)
+ return 0;
+ i = m;
+ j = n;
+ while ((i != 0) && (j != 0)) {
+ if (result[i * (n + 1) + j] == LCS_UPLEFT) {
+ len++;
+ i--;
+ j--;
+ } else if (result[i * (n + 1) + j] == LCS_UP) {
+ i--;
+ } else
+ j--;
+ }
+ free(result);
+ return len;
+}
+
+int SuggestMgr::lcslen(const std::string& s, const std::string& s2) {
+ return lcslen(s.c_str(), s2.c_str());
+}
diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.hxx b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
new file mode 100644
index 0000000000..4c2fb69032
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx
@@ -0,0 +1,183 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+/*
+ * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
+ * And Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. All modifications to the source code must be clearly marked as
+ * such. Binary redistributions based on modified source code
+ * must be clearly marked as modified versions in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef SUGGESTMGR_HXX_
+#define SUGGESTMGR_HXX_
+
+#define MAX_ROOTS 100
+#define MAX_WORDS 100
+#define MAX_GUESS 200
+#define MAXNGRAMSUGS 4
+#define MAXPHONSUGS 2
+#define MAXCOMPOUNDSUGS 3
+
+#define NGRAM_LONGER_WORSE (1 << 0)
+#define NGRAM_ANY_MISMATCH (1 << 1)
+#define NGRAM_LOWERING (1 << 2)
+#define NGRAM_WEIGHTED (1 << 3)
+
+#include "atypes.hxx"
+#include "affixmgr.hxx"
+#include "hashmgr.hxx"
+#include "langnum.hxx"
+
+enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
+
+class SuggestMgr {
+ private:
+ SuggestMgr(const SuggestMgr&);
+ SuggestMgr& operator=(const SuggestMgr&);
+
+ private:
+ char* ckey;
+ size_t ckeyl;
+ std::vector<w_char> ckey_utf;
+
+ char* ctry;
+ size_t ctryl;
+ std::vector<w_char> ctry_utf;
+ bool lang_with_dash_usage;
+
+ AffixMgr* pAMgr;
+ unsigned int maxSug;
+ struct cs_info* csconv;
+ int utf8;
+ int langnum;
+ int nosplitsugs;
+ int maxngramsugs;
+ int maxcpdsugs;
+ int complexprefixes;
+
+ public:
+ SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
+ ~SuggestMgr();
+
+ bool suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
+ void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr, int captype);
+
+ std::string suggest_morph(const std::string& word);
+ std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
+
+ private:
+ void testsug(std::vector<std::string>& wlst,
+ const std::string& candidate,
+ int cpdsuggest,
+ int* timer,
+ clock_t* timelimit);
+ int checkword(const std::string& word, int, int*, clock_t*);
+ int check_forbidden(const char*, int);
+
+ void capchars(std::vector<std::string>&, const char*, int);
+ int replchars(std::vector<std::string>&, const char*, int);
+ int doubletwochars(std::vector<std::string>&, const char*, int);
+ int forgotchar(std::vector<std::string>&, const char*, int);
+ int swapchar(std::vector<std::string>&, const char*, int);
+ int longswapchar(std::vector<std::string>&, const char*, int);
+ int movechar(std::vector<std::string>&, const char*, int);
+ int extrachar(std::vector<std::string>&, const char*, int);
+ int badcharkey(std::vector<std::string>&, const char*, int);
+ int badchar(std::vector<std::string>&, const char*, int);
+ bool twowords(std::vector<std::string>&, const char*, int, bool);
+
+ void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
+ int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
+ int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
+
+ int mapchars(std::vector<std::string>&, const char*, int);
+ int map_related(const char*,
+ std::string&,
+ int,
+ std::vector<std::string>& wlst,
+ int,
+ const std::vector<mapentry>&,
+ int*,
+ clock_t*);
+ int ngram(int n, const std::vector<w_char>& su1,
+ const std::vector<w_char>& su2, int opt);
+ int ngram(int n, const std::string& s1, const std::string& s2, int opt);
+ int mystrlen(const char* word);
+ int leftcommonsubstring(const std::vector<w_char>& su1,
+ const std::vector<w_char>& su2);
+ int leftcommonsubstring(const char* s1, const char* s2);
+ int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
+ void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
+ void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
+ int lcslen(const char* s, const char* s2);
+ int lcslen(const std::string& s, const std::string& s2);
+ std::string suggest_hentry_gen(hentry* rv, const char* pattern);
+};
+
+#endif
diff --git a/extensions/spellcheck/hunspell/src/w_char.hxx b/extensions/spellcheck/hunspell/src/w_char.hxx
new file mode 100644
index 0000000000..7e71d04680
--- /dev/null
+++ b/extensions/spellcheck/hunspell/src/w_char.hxx
@@ -0,0 +1,72 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * Copyright (C) 2002-2022 Németh László
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
+ *
+ * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
+ * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
+ * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
+ * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
+ * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef W_CHAR_HXX_
+#define W_CHAR_HXX_
+
+#include <string>
+
+#ifndef GCC
+struct w_char {
+#else
+struct __attribute__((packed)) w_char {
+#endif
+ unsigned char l;
+ unsigned char h;
+
+ friend bool operator<(const w_char a, const w_char b) {
+ unsigned short a_idx = (a.h << 8) + a.l;
+ unsigned short b_idx = (b.h << 8) + b.l;
+ return a_idx < b_idx;
+ }
+
+ friend bool operator==(const w_char a, const w_char b) {
+ return (((a).l == (b).l) && ((a).h == (b).h));
+ }
+
+ friend bool operator!=(const w_char a, const w_char b) {
+ return !(a == b);;
+ }
+};
+
+// two character arrays
+struct replentry {
+ std::string pattern;
+ std::string outstrings[4]; // med, ini, fin, isol
+};
+
+#endif