diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /extensions/spellcheck/hunspell | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
623 files changed, 32597 insertions, 0 deletions
diff --git a/extensions/spellcheck/hunspell/COPYING.MPL b/extensions/spellcheck/hunspell/COPYING.MPL new file mode 100644 index 0000000000..7714141d15 --- /dev/null +++ b/extensions/spellcheck/hunspell/COPYING.MPL @@ -0,0 +1,470 @@ + MOZILLA PUBLIC LICENSE + Version 1.1 + + --------------- + +1. Definitions. + + 1.0.1. "Commercial Use" means distribution or otherwise making the + Covered Code available to a third party. + + 1.1. "Contributor" means each entity that creates or contributes to + the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Code, prior Modifications used by a Contributor, and the Modifications + made by that particular Contributor. + + 1.3. "Covered Code" means the Original Code or Modifications or the + combination of the Original Code and Modifications, in each case + including portions thereof. + + 1.4. "Electronic Distribution Mechanism" means a mechanism generally + accepted in the software development community for the electronic + transfer of data. + + 1.5. "Executable" means Covered Code in any form other than Source + Code. + + 1.6. "Initial Developer" means the individual or entity identified + as the Initial Developer in the Source Code notice required by Exhibit + A. + + 1.7. "Larger Work" means a work which combines Covered Code or + portions thereof with code not governed by the terms of this License. + + 1.8. "License" means this document. + + 1.8.1. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed herein. + + 1.9. "Modifications" means any addition to or deletion from the + substance or structure of either the Original Code or any previous + Modifications. When Covered Code is released as a series of files, a + Modification is: + A. Any addition to or deletion from the contents of a file + containing Original Code or previous Modifications. + + B. Any new file that contains any part of the Original Code or + previous Modifications. + + 1.10. "Original Code" means Source Code of computer software code + which is described in the Source Code notice required by Exhibit A as + Original Code, and which, at the time of its release under this + License is not already Covered Code governed by this License. + + 1.10.1. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, process, + and apparatus claims, in any patent Licensable by grantor. + + 1.11. "Source Code" means the preferred form of the Covered Code for + making modifications to it, including all modules it contains, plus + any associated interface definition files, scripts used to control + compilation and installation of an Executable, or source code + differential comparisons against either the Original Code or another + well known, available Covered Code of the Contributor's choice. The + Source Code can be in a compressed or archival form, provided the + appropriate decompression or de-archiving software is widely available + for no charge. + + 1.12. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms of, this + License or a future version of this License issued under Section 6.1. + For legal entities, "You" includes any entity which controls, is + controlled by, or is under common control with You. For purposes of + this definition, "control" means (a) the power, direct or indirect, + to cause the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty percent + (50%) of the outstanding shares or beneficial ownership of such + entity. + +2. Source Code License. + + 2.1. The Initial Developer Grant. + The Initial Developer hereby grants You a world-wide, royalty-free, + non-exclusive license, subject to third party intellectual property + claims: + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer to use, reproduce, + modify, display, perform, sublicense and distribute the Original + Code (or portions thereof) with or without Modifications, and/or + as part of a Larger Work; and + + (b) under Patents Claims infringed by the making, using or + selling of Original Code, to make, have made, use, practice, + sell, and offer for sale, and/or otherwise dispose of the + Original Code (or portions thereof). + + (c) the licenses granted in this Section 2.1(a) and (b) are + effective on the date Initial Developer first distributes + Original Code under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: 1) for code that You delete from the Original Code; 2) + separate from the Original Code; or 3) for infringements caused + by: i) the modification of the Original Code or ii) the + combination of the Original Code with other software or devices. + + 2.2. Contributor Grant. + Subject to third party intellectual property claims, each Contributor + hereby grants You a world-wide, royalty-free, non-exclusive license + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor, to use, reproduce, modify, + display, perform, sublicense and distribute the Modifications + created by such Contributor (or portions thereof) either on an + unmodified basis, with other Modifications, as Covered Code + and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either alone + and/or in combination with its Contributor Version (or portions + of such combination), to make, use, sell, offer for sale, have + made, and/or otherwise dispose of: 1) Modifications made by that + Contributor (or portions thereof); and 2) the combination of + Modifications made by that Contributor with its Contributor + Version (or portions of such combination). + + (c) the licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first makes Commercial Use of + the Covered Code. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: 1) for any code that Contributor has deleted from the + Contributor Version; 2) separate from the Contributor Version; + 3) for infringements caused by: i) third party modifications of + Contributor Version or ii) the combination of Modifications made + by that Contributor with other software (except as part of the + Contributor Version) or other devices; or 4) under Patent Claims + infringed by Covered Code in the absence of Modifications made by + that Contributor. + +3. Distribution Obligations. + + 3.1. Application of License. + The Modifications which You create or to which You contribute are + governed by the terms of this License, including without limitation + Section 2.2. The Source Code version of Covered Code may be + distributed only under the terms of this License or a future version + of this License released under Section 6.1, and You must include a + copy of this License with every copy of the Source Code You + distribute. You may not offer or impose any terms on any Source Code + version that alters or restricts the applicable version of this + License or the recipients' rights hereunder. However, You may include + an additional document offering the additional rights described in + Section 3.5. + + 3.2. Availability of Source Code. + Any Modification which You create or to which You contribute must be + made available in Source Code form under the terms of this License + either on the same media as an Executable version or via an accepted + Electronic Distribution Mechanism to anyone to whom you made an + Executable version available; and if made available via Electronic + Distribution Mechanism, must remain available for at least twelve (12) + months after the date it initially became available, or at least six + (6) months after a subsequent version of that particular Modification + has been made available to such recipients. You are responsible for + ensuring that the Source Code version remains available even if the + Electronic Distribution Mechanism is maintained by a third party. + + 3.3. Description of Modifications. + You must cause all Covered Code to which You contribute to contain a + file documenting the changes You made to create that Covered Code and + the date of any change. You must include a prominent statement that + the Modification is derived, directly or indirectly, from Original + Code provided by the Initial Developer and including the name of the + Initial Developer in (a) the Source Code, and (b) in any notice in an + Executable version or related documentation in which You describe the + origin or ownership of the Covered Code. + + 3.4. Intellectual Property Matters + (a) Third Party Claims. + If Contributor has knowledge that a license under a third party's + intellectual property rights is required to exercise the rights + granted by such Contributor under Sections 2.1 or 2.2, + Contributor must include a text file with the Source Code + distribution titled "LEGAL" which describes the claim and the + party making the claim in sufficient detail that a recipient will + know whom to contact. If Contributor obtains such knowledge after + the Modification is made available as described in Section 3.2, + Contributor shall promptly modify the LEGAL file in all copies + Contributor makes available thereafter and shall take other steps + (such as notifying appropriate mailing lists or newsgroups) + reasonably calculated to inform those who received the Covered + Code that new knowledge has been obtained. + + (b) Contributor APIs. + If Contributor's Modifications include an application programming + interface and Contributor has knowledge of patent licenses which + are reasonably necessary to implement that API, Contributor must + also include this information in the LEGAL file. + + (c) Representations. + Contributor represents that, except as disclosed pursuant to + Section 3.4(a) above, Contributor believes that Contributor's + Modifications are Contributor's original creation(s) and/or + Contributor has sufficient rights to grant the rights conveyed by + this License. + + 3.5. Required Notices. + You must duplicate the notice in Exhibit A in each file of the Source + Code. If it is not possible to put such notice in a particular Source + Code file due to its structure, then You must include such notice in a + location (such as a relevant directory) where a user would be likely + to look for such a notice. If You created one or more Modification(s) + You may add your name as a Contributor to the notice described in + Exhibit A. You must also duplicate this License in any documentation + for the Source Code where You describe recipients' rights or ownership + rights relating to Covered Code. You may choose to offer, and to + charge a fee for, warranty, support, indemnity or liability + obligations to one or more recipients of Covered Code. However, You + may do so only on Your own behalf, and not on behalf of the Initial + Developer or any Contributor. You must make it absolutely clear than + any such warranty, support, indemnity or liability obligation is + offered by You alone, and You hereby agree to indemnify the Initial + Developer and every Contributor for any liability incurred by the + Initial Developer or such Contributor as a result of warranty, + support, indemnity or liability terms You offer. + + 3.6. Distribution of Executable Versions. + You may distribute Covered Code in Executable form only if the + requirements of Section 3.1-3.5 have been met for that Covered Code, + and if You include a notice stating that the Source Code version of + the Covered Code is available under the terms of this License, + including a description of how and where You have fulfilled the + obligations of Section 3.2. The notice must be conspicuously included + in any notice in an Executable version, related documentation or + collateral in which You describe recipients' rights relating to the + Covered Code. You may distribute the Executable version of Covered + Code or ownership rights under a license of Your choice, which may + contain terms different from this License, provided that You are in + compliance with the terms of this License and that the license for the + Executable version does not attempt to limit or alter the recipient's + rights in the Source Code version from the rights set forth in this + License. If You distribute the Executable version under a different + license You must make it absolutely clear that any terms which differ + from this License are offered by You alone, not by the Initial + Developer or any Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred by + the Initial Developer or such Contributor as a result of any such + terms You offer. + + 3.7. Larger Works. + You may create a Larger Work by combining Covered Code with other code + not governed by the terms of this License and distribute the Larger + Work as a single product. In such a case, You must make sure the + requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Code due to + statute, judicial order, or regulation then You must: (a) comply with + the terms of this License to the maximum extent possible; and (b) + describe the limitations and the code they affect. Such description + must be included in the LEGAL file described in Section 3.4 and must + be included with all distributions of the Source Code. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Application of this License. + + This License applies to code to which the Initial Developer has + attached the notice in Exhibit A and to related Covered Code. + +6. Versions of the License. + + 6.1. New Versions. + Netscape Communications Corporation ("Netscape") may publish revised + and/or new versions of the License from time to time. Each version + will be given a distinguishing version number. + + 6.2. Effect of New Versions. + Once Covered Code has been published under a particular version of the + License, You may always continue to use it under the terms of that + version. You may also choose to use such Covered Code under the terms + of any subsequent version of the License published by Netscape. No one + other than Netscape has the right to modify the terms applicable to + Covered Code created under this License. + + 6.3. Derivative Works. + If You create or use a modified version of this License (which you may + only do in order to apply it to code which is not already Covered Code + governed by this License), You must (a) rename Your license so that + the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", + "MPL", "NPL" or any confusingly similar phrase do not appear in your + license (except to note that your license differs from this License) + and (b) otherwise make it clear that Your version of the license + contains terms which differ from the Mozilla Public License and + Netscape Public License. (Filling in the name of the Initial + Developer, Original Code or Contributor in the notice described in + Exhibit A shall not of themselves be deemed to be modifications of + this License.) + +7. DISCLAIMER OF WARRANTY. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF + DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. + THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE + IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, + YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE + COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER + OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +8. TERMINATION. + + 8.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to cure + such breach within 30 days of becoming aware of the breach. All + sublicenses to the Covered Code which are properly granted shall + survive any termination of this License. Provisions which, by their + nature, must remain in effect beyond the termination of this License + shall survive. + + 8.2. If You initiate litigation by asserting a patent infringement + claim (excluding declatory judgment actions) against Initial Developer + or a Contributor (the Initial Developer or Contributor against whom + You file such action is referred to as "Participant") alleging that: + + (a) such Participant's Contributor Version directly or indirectly + infringes any patent, then any and all rights granted by such + Participant to You under Sections 2.1 and/or 2.2 of this License + shall, upon 60 days notice from Participant terminate prospectively, + unless if within 60 days after receipt of notice You either: (i) + agree in writing to pay Participant a mutually agreeable reasonable + royalty for Your past and future use of Modifications made by such + Participant, or (ii) withdraw Your litigation claim with respect to + the Contributor Version against such Participant. If within 60 days + of notice, a reasonable royalty and payment arrangement are not + mutually agreed upon in writing by the parties or the litigation claim + is not withdrawn, the rights granted by Participant to You under + Sections 2.1 and/or 2.2 automatically terminate at the expiration of + the 60 day notice period specified above. + + (b) any software, hardware, or device, other than such Participant's + Contributor Version, directly or indirectly infringes any patent, then + any rights granted to You by such Participant under Sections 2.1(b) + and 2.2(b) are revoked effective as of the date You first made, used, + sold, distributed, or had made, Modifications made by that + Participant. + + 8.3. If You assert a patent infringement claim against Participant + alleging that such Participant's Contributor Version directly or + indirectly infringes any patent where such claim is resolved (such as + by license or settlement) prior to the initiation of patent + infringement litigation, then the reasonable value of the licenses + granted by such Participant under Sections 2.1 or 2.2 shall be taken + into account in determining the amount or value of any payment or + license. + + 8.4. In the event of termination under Sections 8.1 or 8.2 above, + all end user license agreements (excluding distributors and resellers) + which have been validly granted by You or any distributor hereunder + prior to termination shall survive termination. + +9. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL + DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, + OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR + ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY + CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, + WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY + RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW + PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE + EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO + THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +10. U.S. GOVERNMENT END USERS. + + The Covered Code is a "commercial item," as that term is defined in + 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer + software" and "commercial computer software documentation," as such + terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 + C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), + all U.S. Government End Users acquire Covered Code with only those + rights set forth herein. + +11. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed by + California law provisions (except to the extent applicable law, if + any, provides otherwise), excluding its conflict-of-law provisions. + With respect to disputes in which at least one party is a citizen of, + or an entity chartered or registered to do business in the United + States of America, any litigation relating to this License shall be + subject to the jurisdiction of the Federal Courts of the Northern + District of California, with venue lying in Santa Clara County, + California, with the losing party responsible for costs, including + without limitation, court costs and reasonable attorneys' fees and + expenses. The application of the United Nations Convention on + Contracts for the International Sale of Goods is expressly excluded. + Any law or regulation which provides that the language of a contract + shall be construed against the drafter shall not apply to this + License. + +12. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or indirectly, + out of its utilization of rights under this License and You agree to + work with Initial Developer and Contributors to distribute such + responsibility on an equitable basis. Nothing herein is intended or + shall be deemed to constitute any admission of liability. + +13. MULTIPLE-LICENSED CODE. + + Initial Developer may designate portions of the Covered Code as + "Multiple-Licensed". "Multiple-Licensed" means that the Initial + Developer permits you to utilize portions of the Covered Code under + Your choice of the NPL or the alternative licenses, if any, specified + by the Initial Developer in the file described in Exhibit A. + +EXHIBIT A -Mozilla Public License. + + ``The contents of this file are subject to the Mozilla Public License + Version 1.1 (the "License"); you may not use this file except in + compliance with the License. You may obtain a copy of the License at + http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific language governing rights and limitations + under the License. + + The Original Code is ______________________________________. + + The Initial Developer of the Original Code is ________________________. + Portions created by ______________________ are Copyright (C) ______ + _______________________. All Rights Reserved. + + Contributor(s): ______________________________________. + + Alternatively, the contents of this file may be used under the terms + of the _____ license (the "[___] License"), in which case the + provisions of [______] License are applicable instead of those + above. If you wish to allow use of your version of this file only + under the terms of the [____] License and not to allow others to use + your version of this file under the MPL, indicate your decision by + deleting the provisions above and replace them with the notice and + other provisions required by the [___] License. If you do not delete + the provisions above, a recipient may use your version of this file + under either the MPL or the [___] License." + + [NOTE: The text of this Exhibit A may differ slightly from the text of + the notices in the Source Code files of the Original Code. You should + use the text of this Exhibit A rather than the text found in the + Original Code Source Code for Your Modifications.] + diff --git a/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp new file mode 100644 index 0000000000..ac6200f8d5 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.cpp @@ -0,0 +1,253 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Assertions.h" +#include "RLBoxHunspell.h" +#include "mozHunspellRLBoxGlue.h" +#include "mozHunspellRLBoxHost.h" +#include "nsThread.h" + +using namespace rlbox; +using namespace mozilla; + +// Helper function for allocating and copying std::string into sandbox +static tainted_hunspell<char*> allocStrInSandbox( + rlbox_sandbox_hunspell& aSandbox, const std::string& str) { + size_t size = str.size() + 1; + tainted_hunspell<char*> t_str = aSandbox.malloc_in_sandbox<char>(size); + if (t_str) { + rlbox::memcpy(aSandbox, t_str, str.c_str(), size); + } + return t_str; +} + +/* static */ +RLBoxHunspell* RLBoxHunspell::Create(const nsCString& affpath, + const nsCString& dpath) { + MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); + + mozilla::UniquePtr<rlbox_sandbox_hunspell> sandbox( + new rlbox_sandbox_hunspell()); + +#if defined(MOZ_WASM_SANDBOXING_HUNSPELL) && !defined(HAVE_64BIT_BUILD) + // By default, the rlbox sandbox size is smaller on 32-bit builds than the max + // 4GB. We may need to ask for a larger sandbox size for hunspell to + // spellcheck in some locales See Bug 1739669 for more details + + // We first get the size of the dictionary. This is actually the first read we + // try on dpath and it might fail for whatever filesystem reasons (invalid + // path, unaccessible, ...). + Result<int64_t, nsresult> dictSizeResult = + mozHunspellFileMgrHost::GetSize(dpath); + NS_ENSURE_TRUE(dictSizeResult.isOk(), nullptr); + + int64_t dictSize = dictSizeResult.unwrap(); + NS_ENSURE_TRUE(dictSize >= 0, nullptr); + + // Next, we compute the expected memory needed for hunspell spell checking. + // This will vary based on the size of the dictionary file, which varies by + // locale — so we size the sandbox by multiplying the file size by 4.8. This + // allows the 1.5MB en_US dictionary to fit in an 8MB sandbox. See bug 1739669 + // and bug 1739761 for the analysis behind this. + const uint64_t expectedMaxMemory = static_cast<uint64_t>(4.8 * dictSize); + + // Get a capacity of at least the expected size + const w2c_mem_capacity capacity = get_valid_wasm2c_memory_capacity( + expectedMaxMemory, true /* wasm's 32-bit memory */); + + bool success = + sandbox->create_sandbox(/* shouldAbortOnFailure = */ false, &capacity); +#elif defined(MOZ_WASM_SANDBOXING_HUNSPELL) + bool success = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false); +#else + sandbox->create_sandbox(); + const bool success = true; +#endif + + NS_ENSURE_TRUE(success, nullptr); + + mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> sandbox_initialized( + sandbox.release()); + + // Add the aff and dict files to allow list + if (!affpath.IsEmpty()) { + mozHunspellCallbacks::AllowFile(affpath); + } + if (!dpath.IsEmpty()) { + mozHunspellCallbacks::AllowFile(dpath); + } + + // TODO Bug 1788857: Verify error handling in case of inaccessible file + return new RLBoxHunspell(std::move(sandbox_initialized), affpath, dpath); +} + +RLBoxHunspell::RLBoxHunspell( + mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> aSandbox, + const nsCString& affpath, const nsCString& dpath) + : mSandbox(std::move(aSandbox)), mHandle(nullptr) { + // Register callbacks + mCreateFilemgr = + mSandbox->register_callback(mozHunspellCallbacks::CreateFilemgr); + mGetLine = mSandbox->register_callback(mozHunspellCallbacks::GetLine); + mGetLineNum = mSandbox->register_callback(mozHunspellCallbacks::GetLineNum); + mDestructFilemgr = + mSandbox->register_callback(mozHunspellCallbacks::DestructFilemgr); + mHunspellToUpperCase = + mSandbox->register_callback(mozHunspellCallbacks::ToUpperCase); + mHunspellToLowerCase = + mSandbox->register_callback(mozHunspellCallbacks::ToLowerCase); + mHunspellGetCurrentCS = + mSandbox->register_callback(mozHunspellCallbacks::GetCurrentCS); + + mSandbox->invoke_sandbox_function(RegisterHunspellCallbacks, mCreateFilemgr, + mGetLine, mGetLineNum, mDestructFilemgr, + mHunspellToUpperCase, mHunspellToLowerCase, + mHunspellGetCurrentCS); + + // Copy the affpath and dpath into the sandbox + // These allocations should definitely succeed as these are first allocations + // inside the sandbox. + tainted_hunspell<char*> t_affpath = + allocStrInSandbox(*mSandbox, affpath.get()); + MOZ_RELEASE_ASSERT(t_affpath); + + tainted_hunspell<char*> t_dpath = allocStrInSandbox(*mSandbox, dpath.get()); + MOZ_RELEASE_ASSERT(t_dpath); + + // Create handle + mHandle = mSandbox->invoke_sandbox_function( + Hunspell_create, rlbox::sandbox_const_cast<const char*>(t_affpath), + rlbox::sandbox_const_cast<const char*>(t_dpath)); + MOZ_RELEASE_ASSERT(mHandle); + + mSandbox->free_in_sandbox(t_dpath); + mSandbox->free_in_sandbox(t_affpath); + + // Get dictionary encoding + tainted_hunspell<char*> t_enc = + mSandbox->invoke_sandbox_function(Hunspell_get_dic_encoding, mHandle); + t_enc.copy_and_verify_string([&](std::unique_ptr<char[]> enc) { + size_t len = std::strlen(enc.get()); + mDicEncoding = std::string(enc.get(), len); + }); +} + +RLBoxHunspell::~RLBoxHunspell() { + MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); + // Call hunspell's destroy which frees mHandle + mSandbox->invoke_sandbox_function(Hunspell_destroy, mHandle); + mHandle = nullptr; + + // Unregister callbacks + mDestructFilemgr.unregister(); + mGetLineNum.unregister(); + mGetLine.unregister(); + mCreateFilemgr.unregister(); + mHunspellToUpperCase.unregister(); + mHunspellToLowerCase.unregister(); + mHunspellGetCurrentCS.unregister(); + + // Clear any callback data and allow list + mozHunspellCallbacks::Clear(); +} + +// Invoking hunspell with words larger than a certain size will cause the +// Hunspell sandbox to run out of memory. So we pick an arbitrary limit of +// 200000 here to ensure this doesn't happen. +static const size_t gWordSizeLimit = 200000; + +int RLBoxHunspell::spell(const std::string& stdWord) { + MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); + + const int ok = 1; + + if (stdWord.length() >= gWordSizeLimit) { + // Fail gracefully assuming the word is spelt correctly + return ok; + } + + // Copy word into the sandbox + tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord); + if (!t_word) { + // Ran out of memory in the hunspell sandbox + // Fail gracefully assuming the word is spelt correctly + return ok; + } + + // Check word + int good = mSandbox + ->invoke_sandbox_function( + Hunspell_spell, mHandle, + rlbox::sandbox_const_cast<const char*>(t_word)) + .copy_and_verify([](int good) { return good; }); + mSandbox->free_in_sandbox(t_word); + return good; +} + +const std::string& RLBoxHunspell::get_dict_encoding() const { + return mDicEncoding; +} + +// This function fails gracefully - if we run out of memory in the hunspell +// sandbox, we return empty suggestion list +std::vector<std::string> RLBoxHunspell::suggest(const std::string& stdWord) { + MOZ_DIAGNOSTIC_ASSERT(NS_IsMainThread()); + + if (stdWord.length() >= gWordSizeLimit) { + return {}; + } + + // Copy word into the sandbox + tainted_hunspell<char*> t_word = allocStrInSandbox(*mSandbox, stdWord); + if (!t_word) { + return {}; + } + + // Allocate suggestion list in the sandbox + tainted_hunspell<char***> t_slst = mSandbox->malloc_in_sandbox<char**>(); + if (!t_slst) { + // Free the earlier allocation + mSandbox->free_in_sandbox(t_word); + return {}; + } + + *t_slst = nullptr; + + // Get suggestions + int nr = mSandbox + ->invoke_sandbox_function( + Hunspell_suggest, mHandle, t_slst, + rlbox::sandbox_const_cast<const char*>(t_word)) + .copy_and_verify([](int nr) { + MOZ_RELEASE_ASSERT(nr >= 0); + return nr; + }); + + tainted_hunspell<char**> t_slst_ref = *t_slst; + + std::vector<std::string> suggestions; + if (nr > 0 && t_slst_ref != nullptr) { + // Copy suggestions from sandbox + suggestions.reserve(nr); + + for (int i = 0; i < nr; i++) { + tainted_hunspell<char*> t_sug = t_slst_ref[i]; + + if (t_sug) { + t_sug.copy_and_verify_string( + [&](std::string sug) { suggestions.push_back(std::move(sug)); }); + // free the suggestion string allocated by the sandboxed hunspell + mSandbox->free_in_sandbox(t_sug); + } + } + + // free the suggestion list allocated by the sandboxed hunspell + mSandbox->free_in_sandbox(t_slst_ref); + } + + mSandbox->free_in_sandbox(t_word); + mSandbox->free_in_sandbox(t_slst); + return suggestions; +} diff --git a/extensions/spellcheck/hunspell/glue/RLBoxHunspell.h b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.h new file mode 100644 index 0000000000..2e5a11d936 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/RLBoxHunspell.h @@ -0,0 +1,66 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_RLBOXHUNSPELL_H_ +#define EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_RLBOXHUNSPELL_H_ + +#include "RLBoxHunspellTypes.h" + +// Load general firefox configuration of RLBox +#include "mozilla/rlbox/rlbox_config.h" + +#ifdef MOZ_WASM_SANDBOXING_HUNSPELL +// Include the generated header file so that we are able to resolve the symbols +// in the wasm binary +# include "rlbox.wasm.h" +# define RLBOX_USE_STATIC_CALLS() rlbox_wasm2c_sandbox_lookup_symbol +# include "mozilla/rlbox/rlbox_wasm2c_sandbox.hpp" +#else +# define RLBOX_USE_STATIC_CALLS() rlbox_noop_sandbox_lookup_symbol +# include "mozilla/rlbox/rlbox_noop_sandbox.hpp" +#endif + +#include "mozilla/rlbox/rlbox.hpp" + +#include <hunspell.h> +#include "mozHunspellRLBoxGlue.h" + +class RLBoxHunspell { + public: + static RLBoxHunspell* Create(const nsCString& affpath, + const nsCString& dpath); + + ~RLBoxHunspell(); + + int spell(const std::string& stdWord); + const std::string& get_dict_encoding() const; + + std::vector<std::string> suggest(const std::string& word); + + private: + struct RLBoxDeleter { + void operator()(rlbox_sandbox_hunspell* sandbox) { + sandbox->destroy_sandbox(); + delete sandbox; + } + }; + + RLBoxHunspell( + mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> aSandbox, + const nsCString& affpath, const nsCString& dpath); + + mozilla::UniquePtr<rlbox_sandbox_hunspell, RLBoxDeleter> mSandbox; + sandbox_callback_hunspell<hunspell_create_filemgr_t*> mCreateFilemgr; + sandbox_callback_hunspell<hunspell_get_line_t*> mGetLine; + sandbox_callback_hunspell<hunspell_get_line_num_t*> mGetLineNum; + sandbox_callback_hunspell<hunspell_destruct_filemgr_t*> mDestructFilemgr; + sandbox_callback_hunspell<hunspell_ToUpperCase_t*> mHunspellToUpperCase; + sandbox_callback_hunspell<hunspell_ToLowerCase_t*> mHunspellToLowerCase; + sandbox_callback_hunspell<hunspell_get_current_cs_t*> mHunspellGetCurrentCS; + tainted_hunspell<Hunhandle*> mHandle; + std::string mDicEncoding; +}; + +#endif diff --git a/extensions/spellcheck/hunspell/glue/RLBoxHunspellTypes.h b/extensions/spellcheck/hunspell/glue/RLBoxHunspellTypes.h new file mode 100644 index 0000000000..d05703ad0f --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/RLBoxHunspellTypes.h @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_RLBOXHUNSPELLTYPES_H_ +#define EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_RLBOXHUNSPELLTYPES_H_ + +#include <stddef.h> +#include "mozilla/rlbox/rlbox_types.hpp" +#include "hunspell_csutil.hxx" + +#ifdef MOZ_WASM_SANDBOXING_HUNSPELL +RLBOX_DEFINE_BASE_TYPES_FOR(hunspell, wasm2c) +#else +RLBOX_DEFINE_BASE_TYPES_FOR(hunspell, noop) +#endif + +#define sandbox_fields_reflection_hunspell_class_cs_info(f, g, ...) \ + f(unsigned char, ccase, FIELD_NORMAL, ##__VA_ARGS__) g() \ + f(unsigned char, clower, FIELD_NORMAL, ##__VA_ARGS__) g() \ + f(unsigned char, cupper, FIELD_NORMAL, ##__VA_ARGS__) g() + +#define sandbox_fields_reflection_hunspell_allClasses(f, ...) \ + f(cs_info, hunspell, ##__VA_ARGS__) + +#endif diff --git a/extensions/spellcheck/hunspell/glue/common.mozbuild b/extensions/spellcheck/hunspell/glue/common.mozbuild new file mode 100644 index 0000000000..471dd0c325 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/common.mozbuild @@ -0,0 +1,10 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +@template +def HunspellIncludes(): + ForceInclude("hunspell_alloc_hooks.h") diff --git a/extensions/spellcheck/hunspell/glue/components.conf b/extensions/spellcheck/hunspell/glue/components.conf new file mode 100644 index 0000000000..ea9fcad75f --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/components.conf @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + 'cid': '{56c778e4-1bee-45f3-a689-886692a97fe7}', + 'contract_ids': ['@mozilla.org/spellchecker/engine;1'], + 'type': 'mozHunspell', + }, +] diff --git a/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h b/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h new file mode 100644 index 0000000000..27a719d788 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/hunspell_alloc_hooks.h @@ -0,0 +1,59 @@ +/******* BEGIN LICENSE BLOCK ******* + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Initial Developers of the Original Code is Mozilla Foundation. + * Portions created by the Initial Developers + * are Copyright (C) 2011 the Initial Developers. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + ******* END LICENSE BLOCK *******/ + +#ifndef alloc_hooks_h__ +#define alloc_hooks_h__ + +/** + * This file is force-included in hunspell code. Its purpose is to add memory + * reporting to hunspell without modifying its code, in order to ease future + * upgrades. + * + * Currently, the memory allocated using operator new/new[] is not being + * tracked, but that's OK, since almost all of the memory used by Hunspell is + * allocated using C memory allocation functions. + */ + +#include "mozHunspellAllocator.h" + +// Ensure that malloc is imported before we set our malloc-counting hooks below. +// Otherwise, if malloc is imported afterwards, its source will be trampled +// over by the "#define"s. +#include "mozmemory.h" + +#define malloc(size) HunspellAllocator::CountingMalloc(size) +#define calloc(count, size) HunspellAllocator::CountingCalloc(count, size) +#define free(ptr) HunspellAllocator::CountingFree(ptr) +#define realloc(ptr, size) HunspellAllocator::CountingRealloc(ptr, size) + +#endif diff --git a/extensions/spellcheck/hunspell/glue/hunspell_csutil.cxx b/extensions/spellcheck/hunspell/glue/hunspell_csutil.cxx new file mode 100644 index 0000000000..0f5565a12c --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/hunspell_csutil.cxx @@ -0,0 +1,160 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2017 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "hunspell_csutil.hxx" +#include "mozilla/Encoding.h" +#include "mozilla/Span.h" +#include "nsUnicharUtils.h" + +/* This is a copy of get_current_cs from the hunspell csutil.cxx file. + */ +struct cs_info* hunspell_get_current_cs(const std::string& es) { + struct cs_info* ccs = new cs_info[256]; + // Initialze the array with dummy data so that we wouldn't need + // to return null in case of failures. + for (int i = 0; i <= 0xff; ++i) { + ccs[i].ccase = false; + ccs[i].clower = i; + ccs[i].cupper = i; + } + + auto encoding = mozilla::Encoding::ForLabelNoReplacement(es); + if (!encoding) { + return ccs; + } + auto encoder = encoding->NewEncoder(); + auto decoder = encoding->NewDecoderWithoutBOMHandling(); + + for (unsigned int i = 0; i <= 0xff; ++i) { + bool success = false; + // We want to find the upper/lowercase equivalents of each byte + // in this 1-byte character encoding. Call our encoding/decoding + // APIs separately for each byte since they may reject some of the + // bytes, and we want to handle errors separately for each byte. + uint8_t lower, upper; + do { + if (i == 0) break; + uint8_t source = uint8_t(i); + char16_t uni[2]; + char16_t uniCased; + uint8_t destination[4]; + auto src1 = mozilla::Span(&source, 1); + auto dst1 = mozilla::Span(uni); + auto src2 = mozilla::Span(&uniCased, 1); + auto dst2 = mozilla::Span(destination); + + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written) = + decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); + if (result != mozilla::kInputEmpty || read != 1 || written != 1) { + break; + } + + uniCased = ToLowerCase(uni[0]); + std::tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != mozilla::kInputEmpty || read != 1 || written != 1) { + break; + } + lower = destination[0]; + + uniCased = ToUpperCase(uni[0]); + std::tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); + if (result != mozilla::kInputEmpty || read != 1 || written != 1) { + break; + } + upper = destination[0]; + + success = true; + } while (0); + + encoding->NewEncoderInto(*encoder); + encoding->NewDecoderWithoutBOMHandlingInto(*decoder); + + if (success) { + ccs[i].cupper = upper; + ccs[i].clower = lower; + } else { + ccs[i].cupper = i; + ccs[i].clower = i; + } + + if (ccs[i].clower != (unsigned char)i) + ccs[i].ccase = true; + else + ccs[i].ccase = false; + } + + return ccs; +} diff --git a/extensions/spellcheck/hunspell/glue/hunspell_csutil.hxx b/extensions/spellcheck/hunspell/glue/hunspell_csutil.hxx new file mode 100644 index 0000000000..00dd89a964 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/hunspell_csutil.hxx @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2017 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_HUNSPELL_CSUTIL_H_ +#define EXTENSIONS_SPELLCHECK_HUNSPELL_GLUE_HUNSPELL_CSUTIL_H_ + +/* We need get_current_cs from hunspell's csutil to live outside the RLBox + * sandbox (since it relies on a Gecko encoding bits) and then expose it to the + * sandboxed hunspell. + */ + +struct cs_info { + unsigned char ccase; + unsigned char clower; + unsigned char cupper; +}; + +struct cs_info* hunspell_get_current_cs(const std::string& es); + +#endif diff --git a/extensions/spellcheck/hunspell/glue/moz.build b/extensions/spellcheck/hunspell/glue/moz.build new file mode 100644 index 0000000000..da6585b66e --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/moz.build @@ -0,0 +1,31 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +UNIFIED_SOURCES += [ + "hunspell_csutil.cxx", + "mozHunspell.cpp", + "mozHunspellRLBoxHost.cpp", + "RLBoxHunspell.cpp", +] + +DEFINES["HUNSPELL_STATIC"] = True + +FINAL_LIBRARY = "xul" + +LOCAL_INCLUDES += [ + "!/security/rlbox", + "../src", + "/extensions/spellcheck/src", +] + +include("/ipc/chromium/chromium-config.mozbuild") +include("common.mozbuild") + +HunspellIncludes() + +XPCOM_MANIFESTS += [ + "components.conf", +] diff --git a/extensions/spellcheck/hunspell/glue/mozHunspell.cpp b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp new file mode 100644 index 0000000000..a69dcd6b05 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspell.cpp @@ -0,0 +1,594 @@ +/******* BEGIN LICENSE BLOCK ******* + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) + * and László Németh (Hunspell). Portions created by the Initial Developers + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. + * + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) + * David Einstein (deinst@world.std.com) + * Michiel van Leeuwen (mvl@exedo.nl) + * Caolan McNamara (cmc@openoffice.org) + * László Németh (nemethl@gyorsposta.hu) + * Davide Prina + * Giuseppe Modugno + * Gianluca Turconi + * Simon Brouwer + * Noll Janos + * Biro Arpad + * Goldman Eleonora + * Sarlos Tamas + * Bencsath Boldizsar + * Halacsy Peter + * Dvornik Laszlo + * Gefferth Andras + * Nagy Viktor + * Varga Daniel + * Chris Halls + * Rene Engelhard + * Bram Moolenaar + * Dafydd Jones + * Harri Pitkanen + * Andras Timar + * Tor Lillqvist + * Jesper Kristensen (mail@jesperkristensen.dk) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + ******* END LICENSE BLOCK *******/ + +#include "mozHunspell.h" +#include "nsReadableUtils.h" +#include "nsString.h" +#include "nsIObserverService.h" +#include "nsIDirectoryEnumerator.h" +#include "nsIFile.h" +#include "nsUnicharUtils.h" +#include "nsCRT.h" +#include "mozInlineSpellChecker.h" +#include "nsIPrefBranch.h" +#include "nsIPrefService.h" +#include "nsNetUtil.h" +#include "mozilla/dom/ContentParent.h" +#include "mozilla/Components.h" +#include "mozilla/Services.h" + +#include <stdlib.h> +#include <tuple> + +using mozilla::dom::ContentParent; +using namespace mozilla; + +NS_IMPL_CYCLE_COLLECTING_ADDREF(mozHunspell) +NS_IMPL_CYCLE_COLLECTING_RELEASE(mozHunspell) + +NS_INTERFACE_MAP_BEGIN(mozHunspell) + NS_INTERFACE_MAP_ENTRY(mozISpellCheckingEngine) + NS_INTERFACE_MAP_ENTRY(nsIObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) + NS_INTERFACE_MAP_ENTRY(nsIMemoryReporter) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, mozISpellCheckingEngine) + NS_INTERFACE_MAP_ENTRIES_CYCLE_COLLECTION(mozHunspell) +NS_INTERFACE_MAP_END + +NS_IMPL_CYCLE_COLLECTION_WEAK(mozHunspell, mPersonalDictionary) + +NS_IMPL_COMPONENT_FACTORY(mozHunspell) { + auto hunspell = MakeRefPtr<mozHunspell>(); + if (NS_SUCCEEDED(hunspell->Init())) { + return hunspell.forget().downcast<mozISpellCheckingEngine>(); + } + return nullptr; +} + +mozHunspell::mozHunspell() { +#ifdef DEBUG + // There must be only one instance of this class: it reports memory based on + // a single static count in HunspellAllocator. + static bool hasRun = false; + MOZ_ASSERT(!hasRun); + hasRun = true; +#endif +} + +nsresult mozHunspell::Init() { + LoadDictionaryList(false); + + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->AddObserver(this, "profile-do-change", true); + obs->AddObserver(this, "profile-after-change", true); + } + + mozilla::RegisterWeakMemoryReporter(this); + + return NS_OK; +} + +mozHunspell::~mozHunspell() { + mozilla::UnregisterWeakMemoryReporter(this); + + mPersonalDictionary = nullptr; + mHunspells.Clear(); +} + +NS_IMETHODIMP +mozHunspell::GetDictionaries(nsTArray<nsCString>& aDictionaries) { + MOZ_ASSERT(aDictionaries.IsEmpty()); + for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { + if (iter.Data().mEnabled) { + aDictionaries.AppendElement(iter.Key()); + } + } + return NS_OK; +} + +/* Set the Dictionaries. + * This also Loads the dictionaries and initializes the converter using the + * dictionaries converter + */ +NS_IMETHODIMP +mozHunspell::SetDictionaries(const nsTArray<nsCString>& aDictionaries) { + if (aDictionaries.IsEmpty()) { + mHunspells.Clear(); + return NS_OK; + } + + // Disable any dictionaries we've already loaded that we're not + // going to use. + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!aDictionaries.Contains(iter.Key())) { + iter.Data().mEnabled = false; + } + } + + bool firstDictionary = true; + for (const auto& dictionary : aDictionaries) { + NS_ConvertUTF8toUTF16 dict(dictionary); + nsIURI* affFile = mDictionaries.GetWeak(dict); + if (!affFile) { + return NS_ERROR_FILE_NOT_FOUND; + } + + nsAutoCString affFileName; + nsresult rv = affFile->GetSpec(affFileName); + NS_ENSURE_SUCCESS(rv, rv); + + if (auto entry = mHunspells.Lookup(dictionary)) { + if (entry.Data().mAffixFileName == affFileName) { + entry.Data().mEnabled = true; + continue; + } + } + + DictionaryData dictionaryData; + dictionaryData.mAffixFileName = affFileName; + + // Load the first dictionary now, we'll load the others lazily during + // checking. + if (firstDictionary) { + rv = dictionaryData.LoadIfNecessary(); + NS_ENSURE_SUCCESS(rv, rv); + firstDictionary = false; + } + + mHunspells.InsertOrUpdate(dictionary, std::move(dictionaryData)); + } + + // If we have a large number of dictionaries loaded, try freeing any disabled + // dictionaries to limit memory use. + if (mHunspells.Count() > 10) { + mHunspells.RemoveIf([](const auto& iter) { return !iter.Data().mEnabled; }); + } + + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::GetPersonalDictionary( + mozIPersonalDictionary** aPersonalDictionary) { + *aPersonalDictionary = mPersonalDictionary; + NS_IF_ADDREF(*aPersonalDictionary); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::SetPersonalDictionary( + mozIPersonalDictionary* aPersonalDictionary) { + mPersonalDictionary = aPersonalDictionary; + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::GetDictionaryList( + nsTArray<nsCString>& aDictionaries) { + MOZ_ASSERT(aDictionaries.IsEmpty()); + for (const auto& key : mDictionaries.Keys()) { + aDictionaries.AppendElement(NS_ConvertUTF16toUTF8(key)); + } + + return NS_OK; +} + +void mozHunspell::LoadDictionaryList(bool aNotifyChildProcesses) { + mDictionaries.Clear(); + + nsresult rv; + + // find built in dictionaries, or dictionaries specified in + // spellchecker.dictionary_path in prefs + nsCOMPtr<nsIFile> dictDir; + + // check preferences first + nsCOMPtr<nsIPrefBranch> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID)); + if (prefs) { + nsAutoCString extDictPath; + rv = prefs->GetCharPref("spellchecker.dictionary_path", extDictPath); + if (NS_SUCCEEDED(rv)) { + // set the spellchecker.dictionary_path + rv = NS_NewNativeLocalFile(extDictPath, true, getter_AddRefs(dictDir)); + } + if (dictDir) { + LoadDictionariesFromDir(dictDir); + } + } + + // find dictionaries in DICPATH + char* dicEnv = PR_GetEnv("DICPATH"); + if (dicEnv) { + // do a two-pass dance so dictionaries are loaded right-to-left as + // preference + nsTArray<nsCOMPtr<nsIFile>> dirs; + nsAutoCString env(dicEnv); // assume dicEnv is UTF-8 + + char* currPath = nullptr; + char* nextPaths = env.BeginWriting(); + while ((currPath = NS_strtok(":", &nextPaths))) { + nsCOMPtr<nsIFile> dir; + rv = + NS_NewNativeLocalFile(nsCString(currPath), true, getter_AddRefs(dir)); + if (NS_SUCCEEDED(rv)) { + dirs.AppendElement(dir); + } + } + + // load them in reverse order so they override each other properly + for (int32_t i = dirs.Length() - 1; i >= 0; i--) { + LoadDictionariesFromDir(dirs[i]); + } + } + + // find dictionaries from restartless extensions + for (int32_t i = 0; i < mDynamicDirectories.Count(); i++) { + LoadDictionariesFromDir(mDynamicDirectories[i]); + } + + for (const auto& dictionaryEntry : mDynamicDictionaries) { + mDictionaries.InsertOrUpdate(dictionaryEntry.GetKey(), + dictionaryEntry.GetData()); + } + + DictionariesChanged(aNotifyChildProcesses); +} + +void mozHunspell::DictionariesChanged(bool aNotifyChildProcesses) { + // Now we have finished updating the list of dictionaries, update the current + // dictionary and any editors which may use it. + mozInlineSpellChecker::UpdateCanEnableInlineSpellChecking(); + + if (aNotifyChildProcesses) { + ContentParent::NotifyUpdatedDictionaries(); + } + + // Check if the current dictionaries are still available. + // If not, try to replace it with other dictionaries of the same language. + if (!mHunspells.IsEmpty()) { + nsTArray<nsCString> dictionaries; + for (auto iter = mHunspells.ConstIter(); !iter.Done(); iter.Next()) { + if (iter.Data().mEnabled) { + dictionaries.AppendElement(iter.Key()); + } + } + nsresult rv = SetDictionaries(dictionaries); + if (NS_SUCCEEDED(rv)) return; + } + + // If the current dictionaries are gone, and we don't have a good replacement, + // set no current dictionary. + if (!mHunspells.IsEmpty()) { + nsTArray<nsCString> empty; + SetDictionaries(empty); + } +} + +NS_IMETHODIMP +mozHunspell::LoadDictionariesFromDir(nsIFile* aDir) { + nsresult rv; + + bool check = false; + rv = aDir->Exists(&check); + if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; + + rv = aDir->IsDirectory(&check); + if (NS_FAILED(rv) || !check) return NS_ERROR_UNEXPECTED; + + nsCOMPtr<nsIDirectoryEnumerator> files; + rv = aDir->GetDirectoryEntries(getter_AddRefs(files)); + if (NS_FAILED(rv)) return NS_ERROR_UNEXPECTED; + + nsCOMPtr<nsIFile> file; + while (NS_SUCCEEDED(files->GetNextFile(getter_AddRefs(file))) && file) { + nsAutoString leafName; + file->GetLeafName(leafName); + if (!StringEndsWith(leafName, u".dic"_ns)) continue; + + nsAutoString dict(leafName); + dict.SetLength(dict.Length() - 4); // magic length of ".dic" + + // check for the presence of the .aff file + leafName = dict; + leafName.AppendLiteral(".aff"); + file->SetLeafName(leafName); + rv = file->Exists(&check); + if (NS_FAILED(rv) || !check) continue; + + // Replace '_' separator with '-' + dict.ReplaceChar('_', '-'); + + nsCOMPtr<nsIURI> uri; + rv = NS_NewFileURI(getter_AddRefs(uri), file); + NS_ENSURE_SUCCESS(rv, rv); + + mDictionaries.InsertOrUpdate(dict, uri); + } + + return NS_OK; +} + +nsresult mozHunspell::DictionaryData::ConvertCharset(const nsAString& aStr, + std::string& aDst) { + if (NS_WARN_IF(!mEncoder)) { + return NS_ERROR_NOT_INITIALIZED; + } + + auto src = Span(aStr.BeginReading(), aStr.Length()); + CheckedInt<size_t> needed = + mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(src.Length()); + if (!needed.isValid()) { + return NS_ERROR_OUT_OF_MEMORY; + } + + aDst.resize(needed.value()); + + char* dstPtr = &aDst[0]; + auto dst = Span(reinterpret_cast<uint8_t*>(dstPtr), needed.value()); + + uint32_t result; + size_t written; + std::tie(result, std::ignore, written) = + mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); + MOZ_ASSERT(result != kOutputFull); + if (result != kInputEmpty) { + return NS_ERROR_UENC_NOMAPPING; + } + aDst.resize(written); + mEncoder->Encoding()->NewEncoderInto(*mEncoder); + return NS_OK; +} + +nsresult mozHunspell::DictionaryData::LoadIfNecessary() { + if (mHunspell && mEncoder && mDecoder) { + return NS_OK; + } + + if (mLoadFailed) { + return NS_ERROR_FAILURE; + } + + nsCString dictFileName = mAffixFileName; + int32_t dotPos = dictFileName.RFindChar('.'); + if (dotPos == -1) { + mLoadFailed = true; + return NS_ERROR_FAILURE; + } + dictFileName.SetLength(dotPos); + dictFileName.AppendLiteral(".dic"); + + UniquePtr<RLBoxHunspell> hunspell( + RLBoxHunspell::Create(mAffixFileName, dictFileName)); + if (!hunspell) { + mLoadFailed = true; + // TODO Bug 1788857: Verify error propagation in case of inaccessible file + return NS_ERROR_OUT_OF_MEMORY; + } + mHunspell = std::move(hunspell); + auto encoding = + Encoding::ForLabelNoReplacement(mHunspell->get_dict_encoding()); + if (!encoding) { + mLoadFailed = true; + return NS_ERROR_UCONV_NOCONV; + } + mEncoder = encoding->NewEncoder(); + mDecoder = encoding->NewDecoderWithoutBOMHandling(); + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::CollectReports(nsIHandleReportCallback* aHandleReport, + nsISupports* aData, bool aAnonymize) { + MOZ_COLLECT_REPORT("explicit/spell-check", KIND_HEAP, UNITS_BYTES, + HunspellAllocator::MemoryAllocated(), + "Memory used by the spell-checking engine."); + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Check(const nsAString& aWord, bool* aResult) { + if (NS_WARN_IF(!aResult)) { + return NS_ERROR_INVALID_ARG; + } + + if (NS_WARN_IF(mHunspells.IsEmpty())) { + return NS_ERROR_FAILURE; + } + + *aResult = true; + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!iter.Data().mEnabled) { + continue; + } + + nsresult rv = iter.Data().LoadIfNecessary(); + if (NS_FAILED(rv)) { + continue; + } + + std::string charsetWord; + rv = iter.Data().ConvertCharset(aWord, charsetWord); + if (NS_FAILED(rv)) { + continue; + } + + // Depending upon the encoding, we might end up with a string that begins + // with the null byte. Since the hunspell interface uses C-style strings, + // this appears like an empty string, and hunspell marks empty strings as + // spelled correctly. Skip these cases to allow another dictionary to have + // the chance to spellcheck them. + if (charsetWord.empty() || charsetWord[0] == 0) { + continue; + } + + *aResult = iter.Data().mHunspell->spell(charsetWord); + if (*aResult) { + break; + } + } + + if (!*aResult && mPersonalDictionary) { + return mPersonalDictionary->Check(aWord, aResult); + } + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Suggest(const nsAString& aWord, nsTArray<nsString>& aSuggestions) { + if (NS_WARN_IF(mHunspells.IsEmpty())) { + return NS_ERROR_FAILURE; + } + + MOZ_ASSERT(aSuggestions.IsEmpty()); + + for (auto iter = mHunspells.Iter(); !iter.Done(); iter.Next()) { + if (!iter.Data().mEnabled) { + continue; + } + + nsresult rv = iter.Data().LoadIfNecessary(); + if (NS_FAILED(rv)) { + continue; + } + + std::string charsetWord; + rv = iter.Data().ConvertCharset(aWord, charsetWord); + NS_ENSURE_SUCCESS(rv, rv); + + std::vector<std::string> suggestions = + iter.Data().mHunspell->suggest(charsetWord); + if (!suggestions.empty()) { + aSuggestions.SetCapacity(aSuggestions.Length() + suggestions.size()); + for (Span<const char> charSrc : suggestions) { + // Convert the suggestion to utf16 + auto src = AsBytes(charSrc); + nsresult rv = + iter.Data().mDecoder->Encoding()->DecodeWithoutBOMHandling( + src, *aSuggestions.AppendElement()); + NS_ENSURE_SUCCESS(rv, rv); + iter.Data().mDecoder->Encoding()->NewDecoderWithoutBOMHandlingInto( + *iter.Data().mDecoder); + } + } + } + + return NS_OK; +} + +NS_IMETHODIMP +mozHunspell::Observe(nsISupports* aSubj, const char* aTopic, + const char16_t* aData) { + NS_ASSERTION(!strcmp(aTopic, "profile-do-change") || + !strcmp(aTopic, "profile-after-change"), + "Unexpected observer topic"); + + LoadDictionaryList(false); + + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::AddDirectory(nsIFile* aDir) { + mDynamicDirectories.AppendObject(aDir); + LoadDictionaryList(true); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::RemoveDirectory(nsIFile* aDir) { + mDynamicDirectories.RemoveObject(aDir); + LoadDictionaryList(true); + +#ifdef MOZ_THUNDERBIRD + /* + * This notification is needed for Thunderbird. Thunderbird derives the + * dictionary from the document's "lang" attribute. If a dictionary is + * removed, we need to change the "lang" attribute. + */ + nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); + if (obs) { + obs->NotifyObservers(nullptr, SPELLCHECK_DICTIONARY_REMOVE_NOTIFICATION, + nullptr); + } +#endif + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::AddDictionary(const nsAString& aLang, + nsIURI* aFile) { + NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); + + mDynamicDictionaries.InsertOrUpdate(aLang, aFile); + mDictionaries.InsertOrUpdate(aLang, aFile); + DictionariesChanged(true); + return NS_OK; +} + +NS_IMETHODIMP mozHunspell::RemoveDictionary(const nsAString& aLang, + nsIURI* aFile, bool* aRetVal) { + NS_ENSURE_TRUE(aFile, NS_ERROR_INVALID_ARG); + *aRetVal = false; + + nsCOMPtr<nsIURI> file = mDynamicDictionaries.Get(aLang); + bool equal; + if (file && NS_SUCCEEDED(file->Equals(aFile, &equal)) && equal) { + mDynamicDictionaries.Remove(aLang); + LoadDictionaryList(true); + *aRetVal = true; + } + return NS_OK; +} diff --git a/extensions/spellcheck/hunspell/glue/mozHunspell.h b/extensions/spellcheck/hunspell/glue/mozHunspell.h new file mode 100644 index 0000000000..89a38d9e49 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspell.h @@ -0,0 +1,145 @@ +/******* BEGIN LICENSE BLOCK ******* + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Initial Developers of the Original Code are Kevin Hendricks (MySpell) + * and László Németh (Hunspell). Portions created by the Initial Developers + * are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved. + * + * Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca) + * David Einstein (deinst@world.std.com) + * Michiel van Leeuwen (mvl@exedo.nl) + * Caolan McNamara (cmc@openoffice.org) + * László Németh (nemethl@gyorsposta.hu) + * Davide Prina + * Giuseppe Modugno + * Gianluca Turconi + * Simon Brouwer + * Noll Janos + * Biro Arpad + * Goldman Eleonora + * Sarlos Tamas + * Bencsath Boldizsar + * Halacsy Peter + * Dvornik Laszlo + * Gefferth Andras + * Nagy Viktor + * Varga Daniel + * Chris Halls + * Rene Engelhard + * Bram Moolenaar + * Dafydd Jones + * Harri Pitkanen + * Andras Timar + * Tor Lillqvist + * Jesper Kristensen (mail@jesperkristensen.dk) + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + ******* END LICENSE BLOCK *******/ + +#ifndef mozHunspell_h__ +#define mozHunspell_h__ + +#include "RLBoxHunspell.h" +#include "mozISpellCheckingEngine.h" +#include "mozIPersonalDictionary.h" +#include "nsString.h" +#include "nsCOMPtr.h" +#include "nsCOMArray.h" +#include "nsHashKeys.h" +#include "nsIMemoryReporter.h" +#include "nsIObserver.h" +#include "nsIURI.h" +#include "mozilla/Encoding.h" +#include "mozilla/UniquePtr.h" +#include "nsInterfaceHashtable.h" +#include "nsWeakReference.h" +#include "nsTHashMap.h" +#include "nsCycleCollectionParticipant.h" +#include "mozHunspellAllocator.h" + +#define MOZ_HUNSPELL_CONTRACTID "@mozilla.org/spellchecker/engine;1" +#define MOZ_HUNSPELL_CID \ + /* 56c778e4-1bee-45f3-a689-886692a97fe7 */ \ + { \ + 0x56c778e4, 0x1bee, 0x45f3, { \ + 0xa6, 0x89, 0x88, 0x66, 0x92, 0xa9, 0x7f, 0xe7 \ + } \ + } + +class mozHunspell final : public mozISpellCheckingEngine, + public nsIObserver, + public nsSupportsWeakReference, + public nsIMemoryReporter { + public: + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + NS_DECL_MOZISPELLCHECKINGENGINE + NS_DECL_NSIOBSERVER + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(mozHunspell, mozISpellCheckingEngine) + + mozHunspell(); + + nsresult Init(); + + void LoadDictionaryList(bool aNotifyChildProcesses); + + NS_DECL_NSIMEMORYREPORTER + + protected: + virtual ~mozHunspell(); + + void DictionariesChanged(bool aNotifyChildProcesses); + + nsCOMPtr<mozIPersonalDictionary> mPersonalDictionary; + + // Hashtable matches dictionary name to .aff file + nsInterfaceHashtable<nsStringHashKey, nsIURI> mDictionaries; + + // dynamic dirs used to search for dictionaries + nsCOMArray<nsIFile> mDynamicDirectories; + nsInterfaceHashtable<nsStringHashKey, nsIURI> mDynamicDictionaries; + + struct DictionaryData { + // keep track of whether the dictionary is currently in use or not + bool mEnabled = true; + + // keep track of whether we've failed loading this dictionary before. + // if set, we don't try loading it again. + bool mLoadFailed = false; + + mozilla::UniquePtr<mozilla::Encoder> mEncoder; + mozilla::UniquePtr<mozilla::Decoder> mDecoder; + mozilla::UniquePtr<RLBoxHunspell> mHunspell; + nsCString mAffixFileName; + + // helper method for converting a word to the charset of the dictionary + nsresult ConvertCharset(const nsAString& aStr, std::string& aDst); + + // helper method to the load the dictionary if it is not already loaded + nsresult LoadIfNecessary(); + }; + + nsTHashMap<nsCStringHashKey, DictionaryData> mHunspells; +}; + +#endif diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h b/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h new file mode 100644 index 0000000000..ddcec3e28c --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellAllocator.h @@ -0,0 +1,15 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozHunspellAllocator_h__ +#define mozHunspellAllocator_h__ + +#include "mozilla/CountingAllocatorBase.h" + +class HunspellAllocator + : public mozilla::CountingAllocatorBase<HunspellAllocator> {}; + +#endif diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxGlue.h b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxGlue.h new file mode 100644 index 0000000000..d84a5ba739 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxGlue.h @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozHunspellRLBoxGlue_h +#define mozHunspellRLBoxGlue_h + +#include <stdint.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef uint32_t(hunspell_create_filemgr_t)(const char* aFilename); +typedef bool(hunspell_get_line_t)(uint32_t aFd, char** aLinePtr); +typedef int(hunspell_get_line_num_t)(uint32_t aFd); +typedef void(hunspell_destruct_filemgr_t)(uint32_t aFd); +typedef uint32_t(hunspell_ToUpperCase_t)(uint32_t aChar); +typedef uint32_t(hunspell_ToLowerCase_t)(uint32_t aChar); +typedef struct cs_info*(hunspell_get_current_cs_t)(const char* es); + +void RegisterHunspellCallbacks( + hunspell_create_filemgr_t* aHunspellCreateFilemgr, + hunspell_get_line_t* aHunspellGetLine, + hunspell_get_line_num_t* aHunspellGetLine_num, + hunspell_destruct_filemgr_t* aHunspellDestructFilemgr, + hunspell_ToUpperCase_t* aHunspellToUpperCase, + hunspell_ToLowerCase_t* aHunspellToLowerCase, + hunspell_get_current_cs_t* aHunspellGetCurrentCS); + +extern hunspell_create_filemgr_t* moz_glue_hunspell_create_filemgr; +extern hunspell_get_line_t* moz_glue_hunspell_get_line; +extern hunspell_get_line_num_t* moz_glue_hunspell_get_line_num; +extern hunspell_destruct_filemgr_t* moz_glue_hunspell_destruct_filemgr; +extern hunspell_ToUpperCase_t* moz_hunspell_ToUpperCase; +extern hunspell_ToLowerCase_t* moz_hunspell_ToLowerCase; +extern hunspell_get_current_cs_t* moz_hunspell_GetCurrentCS; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.cpp b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.cpp new file mode 100644 index 0000000000..61d2f50887 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.cpp @@ -0,0 +1,229 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <limits> + +#include "mozHunspellRLBoxHost.h" +#include "mozilla/DebugOnly.h" +#include "mozilla/Try.h" +#include "nsContentUtils.h" +#include "nsILoadInfo.h" +#include "nsNetUtil.h" +#include "nsUnicharUtils.h" + +#include "hunspell_csutil.hxx" + +using namespace mozilla; + +mozHunspellFileMgrHost::mozHunspellFileMgrHost(const nsCString& aFilename) { + nsCOMPtr<nsIChannel> channel; + DebugOnly<Result<Ok, nsresult>> result = Open(aFilename, channel, mStream); + NS_WARNING_ASSERTION(result.value.isOk(), "Failed to open Hunspell file"); +} + +/* static */ +Result<Ok, nsresult> mozHunspellFileMgrHost::Open( + const nsCString& aPath, nsCOMPtr<nsIChannel>& aChannel, + nsCOMPtr<nsIInputStream>& aStream) { + nsCOMPtr<nsIURI> uri; + MOZ_TRY(NS_NewURI(getter_AddRefs(uri), aPath)); + + MOZ_TRY(NS_NewChannel( + getter_AddRefs(aChannel), uri, nsContentUtils::GetSystemPrincipal(), + nsILoadInfo::SEC_REQUIRE_SAME_ORIGIN_INHERITS_SEC_CONTEXT, + nsIContentPolicy::TYPE_OTHER)); + + MOZ_TRY(aChannel->Open(getter_AddRefs(aStream))); + return Ok(); +} + +Result<Ok, nsresult> mozHunspellFileMgrHost::ReadLine(nsACString& aLine) { + if (!mStream) { + return Err(NS_ERROR_NOT_INITIALIZED); + } + + bool ok; + MOZ_TRY(NS_ReadLine(mStream.get(), &mLineBuffer, aLine, &ok)); + if (!ok) { + mStream = nullptr; + } + + mLineNum++; + return Ok(); +} + +/* static */ +Result<int64_t, nsresult> mozHunspellFileMgrHost::GetSize( + const nsCString& aFilename) { + int64_t ret = -1; + + nsCOMPtr<nsIChannel> channel; + nsCOMPtr<nsIInputStream> stream; + MOZ_TRY(Open(aFilename, channel, stream)); + + channel->GetContentLength(&ret); + return ret; +} + +bool mozHunspellFileMgrHost::GetLine(nsACString& aResult) { + return !ReadLine(aResult).isErr(); +} + +/* static */ +uint32_t mozHunspellCallbacks::sCurrentFreshId = 0; +/* static */ +mozilla::StaticRWLock mozHunspellCallbacks::sFileMgrMapLock; +/* static */ +std::map<uint32_t, std::unique_ptr<mozHunspellFileMgrHost>> + mozHunspellCallbacks::sFileMgrMap; +/* static */ +std::set<nsCString> mozHunspellCallbacks::sFileMgrAllowList; + +/* static */ +void mozHunspellCallbacks::AllowFile(const nsCString& aFilename) { + mozilla::StaticAutoWriteLock lock(sFileMgrMapLock); + sFileMgrAllowList.insert(aFilename); +} + +/* static */ +void mozHunspellCallbacks::Clear() { + mozilla::StaticAutoWriteLock lock(sFileMgrMapLock); + sCurrentFreshId = 0; + sFileMgrMap.clear(); + sFileMgrAllowList.clear(); +} + +/* static */ +tainted_hunspell<uint32_t> mozHunspellCallbacks::CreateFilemgr( + rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<const char*> t_aFilename) { + mozilla::StaticAutoWriteLock lock(sFileMgrMapLock); + + return t_aFilename.copy_and_verify_string( + [&](std::unique_ptr<char[]> aFilename) { + nsCString cFilename = nsDependentCString(aFilename.get()); + + // Ensure that the filename is in the allowlist + auto it = sFileMgrAllowList.find(cFilename); + MOZ_RELEASE_ASSERT(it != sFileMgrAllowList.end()); + + // Get new id + uint32_t freshId = GetFreshId(); + // Save mapping of id to file manager + sFileMgrMap[freshId] = std::unique_ptr<mozHunspellFileMgrHost>( + new mozHunspellFileMgrHost(cFilename)); + + return freshId; + }); +} + +/* static */ +uint32_t mozHunspellCallbacks::GetFreshId() { + // i is uint64_t to prevent overflow during loop increment which would cause + // an infinite loop + for (uint64_t i = sCurrentFreshId; i < std::numeric_limits<uint32_t>::max(); + i++) { + auto it = sFileMgrMap.find(i); + if (it == sFileMgrMap.end()) { + // set sCurrentFreshId to the next (possibly) available id + sCurrentFreshId = i + 1; + return static_cast<uint32_t>(i); + } + } + + MOZ_CRASH("Ran out of unique file ids for hunspell dictionaries"); +} + +/* static */ +mozHunspellFileMgrHost& mozHunspellCallbacks::GetMozHunspellFileMgrHost( + tainted_hunspell<uint32_t> t_aFd) { + mozilla::StaticAutoReadLock lock(sFileMgrMapLock); + uint32_t aFd = t_aFd.copy_and_verify([](uint32_t aFd) { return aFd; }); + auto iter = sFileMgrMap.find(aFd); + MOZ_RELEASE_ASSERT(iter != sFileMgrMap.end()); + return *(iter->second.get()); +} + +/* static */ +tainted_hunspell<bool> mozHunspellCallbacks::GetLine( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aFd, + tainted_hunspell<char**> t_aLinePtr) { + mozHunspellFileMgrHost& inst = + mozHunspellCallbacks::GetMozHunspellFileMgrHost(t_aFd); + nsAutoCString line; + bool ok = inst.GetLine(line); + // If the getline fails, return a null which is "graceful" failure + if (ok) { + // Copy the line into the sandbox. This memory is eventually freed by + // hunspell. + size_t size = line.Length() + 1; + tainted_hunspell<char*> t_line = aSandbox.malloc_in_sandbox<char>(size); + + if (t_line == nullptr) { + // If malloc fails, we should go to "graceful" failure path + ok = false; + } else { + rlbox::memcpy(aSandbox, t_line, line.get(), size); + } + *t_aLinePtr = t_line; + } else { + *t_aLinePtr = nullptr; + } + return ok; +} + +/* static */ +tainted_hunspell<int> mozHunspellCallbacks::GetLineNum( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aFd) { + mozHunspellFileMgrHost& inst = + mozHunspellCallbacks::GetMozHunspellFileMgrHost(t_aFd); + int num = inst.GetLineNum(); + return num; +} + +/* static */ +void mozHunspellCallbacks::DestructFilemgr(rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<uint32_t> t_aFd) { + mozilla::StaticAutoWriteLock lock(sFileMgrMapLock); + uint32_t aFd = t_aFd.copy_and_verify([](uint32_t aFd) { return aFd; }); + + auto iter = sFileMgrMap.find(aFd); + if (iter != sFileMgrMap.end()) { + sFileMgrMap.erase(iter); + } +} + +// Callbacks for using Firefox's encoding instead of hunspell's + +/* static */ +tainted_hunspell<uint32_t> mozHunspellCallbacks::ToUpperCase( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aChar) { + uint32_t aChar = + t_aChar.copy_and_verify([](uint32_t aChar) { return aChar; }); + return ::ToUpperCase(aChar); +} + +/* static */ +tainted_hunspell<uint32_t> mozHunspellCallbacks::ToLowerCase( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aChar) { + uint32_t aChar = + t_aChar.copy_and_verify([](uint32_t aChar) { return aChar; }); + return ::ToLowerCase(aChar); +} + +/* static */ tainted_hunspell<struct cs_info*> +mozHunspellCallbacks::GetCurrentCS(rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<const char*> t_es) { + tainted_hunspell<struct cs_info*> t_ccs = + aSandbox.malloc_in_sandbox<struct cs_info>(256); + MOZ_RELEASE_ASSERT(t_ccs); + return t_es.copy_and_verify_string([&](std::unique_ptr<char[]> es) { + struct cs_info* ccs = hunspell_get_current_cs(es.get()); + rlbox::memcpy(aSandbox, t_ccs, ccs, sizeof(struct cs_info) * 256); + delete[] ccs; + return t_ccs; + }); +} diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.h b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.h new file mode 100644 index 0000000000..f521ce4f71 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxHost.h @@ -0,0 +1,122 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozHunspellRLBoxHost_h +#define mozHunspellRLBoxHost_h + +#include <map> +#include <memory> +#include <mutex> +#include <set> +#include <stdint.h> +#include <string> + +#include "RLBoxHunspell.h" +#include "mozilla/Result.h" +#include "mozilla/ResultExtensions.h" +#include "mozilla/RWLock.h" +#include "nsIChannel.h" +#include "nsIInputStream.h" +#include "nsReadLine.h" + +namespace mozilla { + +class mozHunspellFileMgrHost final { + public: + /** + * aFilename must be a local file/jar URI for the file to load. + */ + explicit mozHunspellFileMgrHost(const nsCString& aFilename); + ~mozHunspellFileMgrHost() = default; + + bool GetLine(nsACString& aResult); + int GetLineNum() const { return mLineNum; } + + static Result<int64_t, nsresult> GetSize(const nsCString& aFilename); + + private: + static mozilla::Result<mozilla::Ok, nsresult> Open( + const nsCString& aPath, nsCOMPtr<nsIChannel>& aChannel, + nsCOMPtr<nsIInputStream>& aStream); + + mozilla::Result<mozilla::Ok, nsresult> ReadLine(nsACString& aLine); + + int mLineNum = 0; + nsCOMPtr<nsIInputStream> mStream; + nsLineBuffer<char> mLineBuffer; +}; + +class mozHunspellCallbacks { + public: + // APIs invoked by the sandboxed hunspell file manager + static tainted_hunspell<uint32_t> CreateFilemgr( + rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<const char*> t_aFilename); + static tainted_hunspell<bool> GetLine(rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<uint32_t> t_aFd, + tainted_hunspell<char**> t_aLinePtr); + static tainted_hunspell<int> GetLineNum(rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<uint32_t> t_aFd); + static void DestructFilemgr(rlbox_sandbox_hunspell& aSandbox, + tainted_hunspell<uint32_t> t_aFd); + + // APIs necessary for hunspell UTF encoding + static tainted_hunspell<uint32_t> ToUpperCase( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aChar); + static tainted_hunspell<uint32_t> ToLowerCase( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<uint32_t> t_aChar); + static tainted_hunspell<struct cs_info*> GetCurrentCS( + rlbox_sandbox_hunspell& aSandbox, tainted_hunspell<const char*> t_es); + + protected: + // API called by RLBox + + /** + * Add filename to allow list. + */ + static void AllowFile(const nsCString& aFilename); + friend RLBoxHunspell* RLBoxHunspell::Create(const nsCString& affpath, + const nsCString& dpath); + /** + * Clear allow list and map of hunspell file managers. + */ + static void Clear(); + friend RLBoxHunspell::~RLBoxHunspell(); + + private: + /** + * sFileMgrMap holds a map between unique uint32_t + * integers and mozHunspellFileMgrHost instances + */ + static std::map<uint32_t, std::unique_ptr<mozHunspellFileMgrHost>> + sFileMgrMap; + + /** + * sFileMgrAllowList contains the filenames of the dictionary files hunspell + * is allowed to open + */ + static std::set<nsCString> sFileMgrAllowList; + /** + * Reader-writer lock for the sFileMgrMap + */ + static mozilla::StaticRWLock sFileMgrMapLock; + /** + * Tracks the next possibly unused id for sFileMgrMap + */ + static uint32_t sCurrentFreshId; + /** + * Returns an unused id for sFileMgrMap + */ + static uint32_t GetFreshId(); + /** + * Returns the mozHunspellFileMgrHost for the given uint32_t id + */ + static mozHunspellFileMgrHost& GetMozHunspellFileMgrHost( + tainted_hunspell<uint32_t> t_aFd); +}; +} // namespace mozilla + +#endif diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.cpp b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.cpp new file mode 100644 index 0000000000..f913b4bc83 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.cpp @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozHunspellRLBoxSandbox.h" +#include "mozHunspellRLBoxGlue.h" + +FileMgr::FileMgr(const char* aFilename, const char* aKey) : mFd(0) { + // The key is not used in firefox + mFd = moz_glue_hunspell_create_filemgr(aFilename); +} + +bool FileMgr::getline(std::string& aResult) { + char* line = nullptr; + bool ok = moz_glue_hunspell_get_line(mFd, &line); + if (ok && line) { + aResult = line; + free(line); + } + return ok; +} + +int FileMgr::getlinenum() const { return moz_glue_hunspell_get_line_num(mFd); } + +FileMgr::~FileMgr() { moz_glue_hunspell_destruct_filemgr(mFd); } + +// Glue code to set global callbacks + +hunspell_create_filemgr_t* moz_glue_hunspell_create_filemgr = nullptr; +hunspell_get_line_t* moz_glue_hunspell_get_line = nullptr; +hunspell_get_line_num_t* moz_glue_hunspell_get_line_num = nullptr; +hunspell_destruct_filemgr_t* moz_glue_hunspell_destruct_filemgr = nullptr; +hunspell_ToUpperCase_t* moz_hunspell_ToUpperCase = nullptr; +hunspell_ToLowerCase_t* moz_hunspell_ToLowerCase = nullptr; +hunspell_get_current_cs_t* moz_hunspell_GetCurrentCS = nullptr; + +void RegisterHunspellCallbacks( + hunspell_create_filemgr_t* aHunspellCreateFilemgr, + hunspell_get_line_t* aHunspellGetLine, + hunspell_get_line_num_t* aHunspellGetLine_num, + hunspell_destruct_filemgr_t* aHunspellDestructFilemgr, + hunspell_ToUpperCase_t* aHunspellToUpperCase, + hunspell_ToLowerCase_t* aHunspellToLowerCase, + hunspell_get_current_cs_t* aHunspellGetCurrentCS) { + moz_glue_hunspell_create_filemgr = aHunspellCreateFilemgr; + moz_glue_hunspell_get_line = aHunspellGetLine; + moz_glue_hunspell_get_line_num = aHunspellGetLine_num; + moz_glue_hunspell_destruct_filemgr = aHunspellDestructFilemgr; + moz_hunspell_ToUpperCase = aHunspellToUpperCase; + moz_hunspell_ToLowerCase = aHunspellToLowerCase; + moz_hunspell_GetCurrentCS = aHunspellGetCurrentCS; +} diff --git a/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.h b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.h new file mode 100644 index 0000000000..9850408807 --- /dev/null +++ b/extensions/spellcheck/hunspell/glue/mozHunspellRLBoxSandbox.h @@ -0,0 +1,36 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozHunspellRLBoxSandbox_h +#define mozHunspellRLBoxSandbox_h + +#include <string> +#include <stdint.h> + +// Note: This class name and lack of namespacing terrible, but are necessary +// for Hunspell compatibility. +class FileMgr final { + public: + /** + * aFilename must be a local file/jar URI for the file to load. + * + * aKey is the decription key for encrypted Hunzip files, and is + * unsupported. The argument is there solely for compatibility. + */ + explicit FileMgr(const char* aFilename, const char* aKey = nullptr); + ~FileMgr(); + + // Note: The nonstandard naming conventions of these methods are necessary for + // Hunspell compatibility. + bool getline(std::string& aLine); + int getlinenum() const; + + private: + // opaque file descriptor got from the host application + uint32_t mFd; +}; + +#endif // mozHunspellRLBoxSandbox_h diff --git a/extensions/spellcheck/hunspell/moz.build b/extensions/spellcheck/hunspell/moz.build new file mode 100644 index 0000000000..784b49339c --- /dev/null +++ b/extensions/spellcheck/hunspell/moz.build @@ -0,0 +1,13 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +DIRS += ["glue"] + +if not CONFIG["MOZ_WASM_SANDBOXING_HUNSPELL"]: + DIRS += ["src"] + +if CONFIG["ENABLE_TESTS"]: + XPCSHELL_TESTS_MANIFESTS += ["tests/unit/xpcshell.toml"] diff --git a/extensions/spellcheck/hunspell/moz.yaml b/extensions/spellcheck/hunspell/moz.yaml new file mode 100644 index 0000000000..0d4f221ebd --- /dev/null +++ b/extensions/spellcheck/hunspell/moz.yaml @@ -0,0 +1,60 @@ +schema: 1 + +bugzilla: + product: Core + component: "Spelling checker" + +origin: + name: Hunspell + description: Spell checker + + url: http://hunspell.github.io + + release: 1180421f50f211984211e968eb6801ffd3390b8f (2022-08-22T13:53:31Z). + revision: 1180421f50f211984211e968eb6801ffd3390b8f + license: MPL-1.1 + license-file: COPYING.MPL + +vendoring: + url: https://github.com/hunspell/hunspell + source-hosting: github + tracking: commit + + exclude: + - "**" + + keep: + - glue + - tests + - COPYING.MPL + - src/moz.build + - src/sources.mozbuild + - src/license.hunspell + - src/license.myspell + - src/README.md + + include: + - src/hunspell + + patches: + - patches/bug1410214.patch + - patches/bug1653659.patch + - patches/bug1739761.patch + - patches/bug1838113.patch + + update-actions: + - action: move-dir + from: '{vendor_dir}/src/hunspell' + to: '{vendor_dir}/src' + - action: delete-path + path: "src/Makefile.am" + - action: delete-path + path: "src/filemgr.cxx" + - action: delete-path + path: "src/hunvisapi.h.in" + - action: delete-path + path: "src/hunzip.cxx" + - action: delete-path + path: "src/hunzip.hxx" + - action: delete-path + path: "src/utf_info.hxx" diff --git a/extensions/spellcheck/hunspell/patches/bug1410214.patch b/extensions/spellcheck/hunspell/patches/bug1410214.patch new file mode 100644 index 0000000000..4aeab7d64b --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1410214.patch @@ -0,0 +1,37 @@ +diff --git a/src/filemgr.hxx b/src/filemgr.hxx +--- a/src/filemgr.hxx ++++ b/src/filemgr.hxx +@@ -67,32 +67,11 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + /* file manager class - read lines of files [filename] OR [filename.hz] */ + #ifndef FILEMGR_HXX_ + #define FILEMGR_HXX_ + +-#include "hunzip.hxx" +-#include <stdio.h> +-#include <string> +-#include <fstream> +- +-class FileMgr { +- private: +- FileMgr(const FileMgr&); +- FileMgr& operator=(const FileMgr&); ++#include "mozHunspellRLBoxSandbox.h" + +- protected: +- std::ifstream fin; +- Hunzip* hin; +- char in[BUFSIZE + 50]; // input buffer +- int fail(const char* err, const char* par); +- int linenum; +- +- public: +- FileMgr(const char* filename, const char* key = NULL); +- ~FileMgr(); +- bool getline(std::string&); +- int getlinenum(); +-}; + #endif diff --git a/extensions/spellcheck/hunspell/patches/bug1653659.patch b/extensions/spellcheck/hunspell/patches/bug1653659.patch new file mode 100644 index 0000000000..554844225b --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1653659.patch @@ -0,0 +1,190 @@ +diff --git a/src/csutil.cxx b/src/csutil.cxx +--- a/src/csutil.cxx ++++ b/src/csutil.cxx +@@ -90,21 +90,17 @@ + #else + #ifndef MOZILLA_CLIENT + #include "utf_info.hxx" + #define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) + #endif + #endif + + #ifdef MOZILLA_CLIENT +-#include "nsCOMPtr.h" +-#include "nsUnicharUtils.h" +-#include "mozilla/Encoding.h" +- +-using namespace mozilla; ++#include "mozHunspellRLBoxGlue.h" + #endif + + struct unicode_info2 { + char cletter; + unsigned short cupper; + unsigned short clower; + }; + +@@ -2277,101 +2273,18 @@ struct cs_info* get_current_cs(const std + "error: unknown encoding %s: using %s as fallback\n", es.c_str(), + encds[0].enc_name); + ccs = encds[0].cs_table; + } + + return ccs; + } + #else +-// XXX This function was rewritten for mozilla. Instead of storing the +-// conversion tables static in this file, create them when needed +-// with help the mozilla backend. + struct cs_info* get_current_cs(const std::string& es) { +- struct cs_info* ccs = new cs_info[256]; +- // Initialze the array with dummy data so that we wouldn't need +- // to return null in case of failures. +- for (int i = 0; i <= 0xff; ++i) { +- ccs[i].ccase = false; +- ccs[i].clower = i; +- ccs[i].cupper = i; +- } +- +- auto encoding = Encoding::ForLabelNoReplacement(es); +- if (!encoding) { +- return ccs; +- } +- auto encoder = encoding->NewEncoder(); +- auto decoder = encoding->NewDecoderWithoutBOMHandling(); +- +- for (unsigned int i = 0; i <= 0xff; ++i) { +- bool success = false; +- // We want to find the upper/lowercase equivalents of each byte +- // in this 1-byte character encoding. Call our encoding/decoding +- // APIs separately for each byte since they may reject some of the +- // bytes, and we want to handle errors separately for each byte. +- uint8_t lower, upper; +- do { +- if (i == 0) +- break; +- uint8_t source = uint8_t(i); +- char16_t uni[2]; +- char16_t uniCased; +- uint8_t destination[4]; +- auto src1 = MakeSpan(&source, 1); +- auto dst1 = MakeSpan(uni); +- auto src2 = MakeSpan(&uniCased, 1); +- auto dst2 = MakeSpan(destination); +- +- uint32_t result; +- size_t read; +- size_t written; +- Tie(result, read, written) = +- decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- +- uniCased = ToLowerCase(uni[0]); +- Tie(result, read, written) = +- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- lower = destination[0]; +- +- uniCased = ToUpperCase(uni[0]); +- Tie(result, read, written) = +- encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); +- if (result != kInputEmpty || read != 1 || written != 1) { +- break; +- } +- upper = destination[0]; +- +- success = true; +- } while (0); +- +- encoding->NewEncoderInto(*encoder); +- encoding->NewDecoderWithoutBOMHandlingInto(*decoder); +- +- if (success) { +- ccs[i].cupper = upper; +- ccs[i].clower = lower; +- } else { +- ccs[i].cupper = i; +- ccs[i].clower = i; +- } +- +- if (ccs[i].clower != (unsigned char)i) +- ccs[i].ccase = true; +- else +- ccs[i].ccase = false; +- } +- +- return ccs; ++ return moz_hunspell_GetCurrentCS(es.c_str()); + } + #endif + + // primitive isalpha() replacement for tokenization + std::string get_casechars(const char* enc) { + struct cs_info* csconv = get_current_cs(enc); + std::string expw; + for (int i = 0; i <= 255; ++i) { +@@ -2455,34 +2368,34 @@ unsigned short unicodetoupper(unsigned s + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0130; + #ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_toupper(c)); + #else + #ifdef MOZILLA_CLIENT +- return ToUpperCase((char16_t)c); ++ return moz_hunspell_ToUpperCase((char16_t)c); + #else + return (utf_tbl) ? utf_tbl[c].cupper : c; + #endif + #endif + } + + unsigned short unicodetolower(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0131; + #ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_tolower(c)); + #else + #ifdef MOZILLA_CLIENT +- return ToLowerCase((char16_t)c); ++ return moz_hunspell_ToLowerCase((char16_t)c); + #else + return (utf_tbl) ? utf_tbl[c].clower : c; + #endif + #endif + } + + int unicodeisalpha(unsigned short c) { + #ifdef OPENOFFICEORG +diff --git a/src/csutil.hxx b/src/csutil.hxx +--- a/src/csutil.hxx ++++ b/src/csutil.hxx +@@ -77,20 +77,16 @@ + + #include <fstream> + #include <string> + #include <vector> + #include <string.h> + #include "w_char.hxx" + #include "htypes.hxx" + +-#ifdef MOZILLA_CLIENT +-#include "nscore.h" // for mozalloc headers +-#endif +- + // casing + #define NOCAP 0 + #define INITCAP 1 + #define ALLCAP 2 + #define HUHCAP 3 + #define HUHINITCAP 4 + + // default encoding and keystring diff --git a/extensions/spellcheck/hunspell/patches/bug1739761.patch b/extensions/spellcheck/hunspell/patches/bug1739761.patch new file mode 100644 index 0000000000..d471d627f0 --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1739761.patch @@ -0,0 +1,615 @@ +diff --git a/src/hashmgr.cxx b/src/hashmgr.cxx +--- a/src/hashmgr.cxx ++++ b/src/hashmgr.cxx +@@ -63,16 +63,17 @@ + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + ++#include <assert.h> + #include <stdlib.h> + #include <string.h> + #include <stdio.h> + #include <ctype.h> + #include <limits> + #include <sstream> + + #include "hashmgr.hxx" +@@ -118,52 +119,54 @@ HashMgr::~HashMgr() { + // go through column by column of the table + for (int i = 0; i < tablesize; i++) { + struct hentry* pt = tableptr[i]; + struct hentry* nt = NULL; + while (pt) { + nt = pt->next; + if (pt->astr && + (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) +- free(pt->astr); +- free(pt); ++ arena_free(pt->astr); ++ arena_free(pt); + pt = nt; + } + } + free(tableptr); + } + tablesize = 0; + + if (aliasf) { + for (int j = 0; j < (numaliasf); j++) +- free(aliasf[j]); +- free(aliasf); ++ arena_free(aliasf[j]); ++ arena_free(aliasf); + aliasf = NULL; + if (aliasflen) { +- free(aliasflen); ++ arena_free(aliasflen); + aliasflen = NULL; + } + } + if (aliasm) { + for (int j = 0; j < (numaliasm); j++) +- free(aliasm[j]); +- free(aliasm); ++ arena_free(aliasm[j]); ++ arena_free(aliasm); + aliasm = NULL; + } + + #ifndef OPENOFFICEORG + #ifndef MOZILLA_CLIENT + if (utf8) + free_utf_tbl(); + #endif + #endif + + #ifdef MOZILLA_CLIENT + delete[] csconv; + #endif ++ ++ assert(outstanding_arena_allocations == 0); + } + + // lookup a root word in the hashtable + + struct hentry* HashMgr::lookup(const char* word) const { + struct hentry* dp; + if (tableptr) { + dp = tableptr[hash(word)]; +@@ -222,17 +225,17 @@ int HashMgr::add_word(const std::string& + + word = word_copy; + } + + bool upcasehomonym = false; + int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; + // variable-length hash record with word and optional fields + struct hentry* hp = +- (struct hentry*)malloc(sizeof(struct hentry) + word->size() + descl); ++ (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); + if (!hp) { + delete desc_copy; + delete word_copy; + return 1; + } + + char* hpw = hp->word; + strcpy(hpw, word->c_str()); +@@ -366,57 +369,57 @@ int HashMgr::add_word(const std::string& + delete word_copy; + return 0; + } + while (dp->next != NULL) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + dp = dp->next; + } + if (strcmp(hp->word, dp->word) == 0) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; +- free(hp); ++ arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + if (!upcasehomonym) { + dp->next = hp; + } else { + // remove hidden onlyupcase homonym + if (hp->astr) +- free(hp->astr); +- free(hp); ++ arena_free(hp->astr); ++ arena_free(hp); + } + + delete desc_copy; + delete word_copy; + return 0; + } + + int HashMgr::add_hidden_capitalized_word(const std::string& word, +@@ -430,17 +433,17 @@ int HashMgr::add_hidden_capitalized_word + + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == HUHCAP) || (captype == HUHINITCAP) || + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short* flags2 = +- (unsigned short*)malloc(sizeof(unsigned short) * (flagslen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); + if (!flags2) + return 1; + if (flagslen) + memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; + if (utf8) { + std::string st; + std::vector<w_char> w; +@@ -479,23 +482,23 @@ int HashMgr::get_clen_and_captype(const + } + + // remove word (personal dictionary function for standalone applications) + int HashMgr::remove(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); + while (dp) { + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { + unsigned short* flags = +- (unsigned short*)malloc(sizeof(unsigned short) * (dp->alen + 1)); ++ (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); + if (!flags) + return 1; + for (int i = 0; i < dp->alen; i++) + flags[i] = dp->astr[i]; + flags[dp->alen] = forbiddenword; +- free(dp->astr); ++ arena_free(dp->astr); + dp->astr = flags; + dp->alen++; + std::sort(flags, flags + dp->alen); + } + dp = dp->next_homonym; + } + return 0; + } +@@ -533,17 +536,17 @@ int HashMgr::add_with_affix(const std::s + remove_forbidden_flag(word); + if (dp && dp->astr) { + int captype; + int wcl = get_clen_and_captype(word, &captype); + if (aliasf) { + add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); + } else { + unsigned short* flags = +- (unsigned short*)malloc(dp->alen * sizeof(unsigned short)); ++ (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); + if (flags) { + memcpy((void*)flags, (void*)dp->astr, + dp->alen * sizeof(unsigned short)); + add_word(word, wcl, flags, dp->alen, NULL, false, captype); + } else + return 1; + } + return add_hidden_capitalized_word(word, wcl, dp->astr, +@@ -668,17 +671,17 @@ int HashMgr::load_tables(const char* tpa + if (aliasf) { + int index = atoi(ap.c_str()); + al = get_aliasf(index, &flags, dict); + if (!al) { + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", + dict->getlinenum()); + } + } else { +- al = decode_flags(&flags, ap.c_str(), dict); ++ al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } + std::sort(flags, flags + al); + } + } else { +@@ -709,47 +712,48 @@ int HashMgr::hash(const char* word) cons + hv = (hv << 8) | (*word++); + while (*word != 0) { + ROTATE(hv, ROTATE_LEN); + hv ^= (*word++); + } + return (unsigned long)hv % tablesize; + } + +-int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { ++ auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; + int len; + if (flags.empty()) { + *result = NULL; + return 0; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + len = flags.size(); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + for (int i = 0; i < len; i++) { + (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + + (unsigned char)flags[i * 2 + 1]; + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + len = 1; + unsigned short* dest; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i] == ',') + len++; + } +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + const char* src = flags.c_str(); + for (const char* p = src; *p; p++) { + if (*p == ',') { + int i = atoi(src); + if (i >= DEFAULTFLAGS) +@@ -774,26 +778,26 @@ int HashMgr::decode_flags(unsigned short + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + std::vector<w_char> w; + u8_u16(w, flags); + len = w.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + memcpy(*result, w.data(), len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + unsigned short* dest; + len = flags.size(); +- *result = (unsigned short*)malloc(len * sizeof(unsigned short)); ++ *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (size_t i = 0; i < flags.size(); ++i) { + *dest = (unsigned char)flags[i]; + dest++; + } + } +@@ -890,16 +894,18 @@ unsigned short HashMgr::decode_flag(cons + default: + s = *(unsigned char*)f; + } + if (s == 0) + HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + return s; + } + ++// This function is only called by external consumers, and so using the default ++// allocator with mystrdup is correct. + char* HashMgr::encode_flag(unsigned short f) const { + if (f == 0) + return mystrdup("(NULL)"); + std::string ch; + if (flag_mode == FLAG_LONG) { + ch.push_back((unsigned char)(f >> 8)); + ch.push_back((unsigned char)(f - ((f >> 8) << 8))); + } else if (flag_mode == FLAG_NUM) { +@@ -1070,42 +1076,42 @@ bool HashMgr::parse_aliasf(const std::st + numaliasf = 0; + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + aliasf = +- (unsigned short**)malloc(numaliasf * sizeof(unsigned short*)); ++ (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); + aliasflen = +- (unsigned short*)malloc(numaliasf * sizeof(unsigned short)); ++ (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); + if (!aliasf || !aliasflen) { + numaliasf = 0; + if (aliasf) +- free(aliasf); ++ arena_free(aliasf); + if (aliasflen) +- free(aliasflen); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasf = 0; +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasf lines to read in the remainder of the table */ +@@ -1126,33 +1132,33 @@ bool HashMgr::parse_aliasf(const std::st + errored = true; + break; + } + break; + } + case 1: { + std::string piece(start_piece, iter); + aliasflen[j] = +- (unsigned short)decode_flags(&(aliasf[j]), piece, af); ++ (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); + std::sort(aliasf[j], aliasf[j] + aliasflen[j]); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasf[j]) { + for (int k = 0; k < j; ++k) { +- free(aliasf[k]); ++ arena_free(aliasf[k]); + } +- free(aliasf); +- free(aliasflen); ++ arena_free(aliasf); ++ arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + numaliasf = 0; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } +@@ -1193,33 +1199,33 @@ bool HashMgr::parse_aliasm(const std::st + } + case 1: { + numaliasm = atoi(std::string(start_piece, iter).c_str()); + if (numaliasm < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } +- aliasm = (char**)malloc(numaliasm * sizeof(char*)); ++ aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); + if (!aliasm) { + numaliasm = 0; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasm = 0; +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasm lines to read in the remainder of the table */ + for (int j = 0; j < numaliasm; j++) { +@@ -1245,32 +1251,36 @@ bool HashMgr::parse_aliasm(const std::st + std::string::const_iterator end = nl.end(); + std::string chunk(start_piece, end); + if (complexprefixes) { + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + } +- aliasm[j] = mystrdup(chunk.c_str()); ++ size_t sl = chunk.length() + 1; ++ aliasm[j] = (char*)arena_alloc(sl); ++ if (aliasm[j]) { ++ memcpy(aliasm[j], chunk.c_str(), sl); ++ } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasm[j]) { + numaliasm = 0; + for (int k = 0; k < j; ++k) { +- free(aliasm[k]); ++ arena_free(aliasm[k]); + } +- free(aliasm); ++ arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; + } +@@ -1379,8 +1389,27 @@ bool HashMgr::parse_reptable(const std:: + } + return true; + } + + // return replacing table + const std::vector<replentry>& HashMgr::get_reptable() const { + return reptable; + } ++ ++void* HashMgr::arena_alloc(int num_bytes) { ++ static const int MIN_CHUNK_SIZE = 4096; ++ if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { ++ current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); ++ arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); ++ current_chunk_offset = 0; ++ } ++ ++ uint8_t* ptr = &arena.back()[current_chunk_offset]; ++ current_chunk_offset += num_bytes; ++ outstanding_arena_allocations++; ++ return ptr; ++} ++ ++void HashMgr::arena_free(void* ptr) { ++ --outstanding_arena_allocations; ++ assert(outstanding_arena_allocations >= 0); ++} +diff --git a/src/hashmgr.hxx b/src/hashmgr.hxx +--- a/src/hashmgr.hxx ++++ b/src/hashmgr.hxx +@@ -67,16 +67,18 @@ + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + #ifndef HASHMGR_HXX_ + #define HASHMGR_HXX_ + + #include <stdio.h> ++#include <stdint.h> ++#include <memory> + #include <string> + #include <vector> + + #include "htypes.hxx" + #include "filemgr.hxx" + #include "w_char.hxx" + + enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; +@@ -116,17 +118,23 @@ class HashMgr { + + struct hentry* lookup(const char*) const; + int hash(const char*) const; + struct hentry* walk_hashtable(int& col, struct hentry* hp) const; + + int add(const std::string& word); + int add_with_affix(const std::string& word, const std::string& pattern); + int remove(const std::string& word); +- int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const; ++private: ++ // Only internal consumers are allowed to arena-allocate. ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const; ++public: ++ int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { ++ return decode_flags(result, flags, af, /* arena = */ false); ++ } + bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const; + unsigned short decode_flag(const char* flag) const; + char* encode_flag(unsigned short flag) const; + int is_aliasf() const; + int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; + int is_aliasm() const; + char* get_aliasm(int index) const; + const std::vector<replentry>& get_reptable() const; +@@ -148,11 +156,27 @@ class HashMgr { + int wcl, + unsigned short* flags, + int al, + const std::string* dp, + int captype); + bool parse_aliasm(const std::string& line, FileMgr* af); + bool parse_reptable(const std::string& line, FileMgr* af); + int remove_forbidden_flag(const std::string& word); ++ ++ // Our Mozilla fork uses a simple arena allocator for certain strings which ++ // persist for the lifetime of the HashMgr in order to avoid heap fragmentation. ++ // It's a simple bump-allocator, so we can't actually free() memory midway ++ // through the lifecycle, but we have a dummy free() implementation to ensure ++ // that our calls to arena_alloc() and arena_free() are balanced. ++ void* arena_alloc(int num_bytes); ++ void* arena_alloc(int num_bytes) const { ++ return const_cast<HashMgr*>(this)->arena_alloc(num_bytes); ++ } ++ void arena_free(void* ptr); ++ ++ std::vector<std::unique_ptr<uint8_t[]>> arena; ++ int current_chunk_size = 0; ++ int current_chunk_offset = 0; ++ int outstanding_arena_allocations = 0; + }; + + #endif diff --git a/extensions/spellcheck/hunspell/patches/bug1838113.patch b/extensions/spellcheck/hunspell/patches/bug1838113.patch new file mode 100644 index 0000000000..727349d20a --- /dev/null +++ b/extensions/spellcheck/hunspell/patches/bug1838113.patch @@ -0,0 +1,20 @@ +diff --git a/src/csutil.cxx b/src/csutil.cxx +index 48e58ff4b2677..39a54d38023c8 100644 +--- a/src/csutil.cxx ++++ b/src/csutil.cxx +@@ -108,6 +108,7 @@ static struct unicode_info2* utf_tbl = NULL; + static int utf_tbl_count = + 0; // utf_tbl can be used by multiple Hunspell instances + ++#ifndef MOZILLA_CLIENT + void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode) + { + #if defined(_WIN32) && defined(_MSC_VER) +@@ -127,6 +128,7 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod + #endif + stream.open(path, mode); + } ++#endif + + std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) { + dest.clear(); diff --git a/extensions/spellcheck/hunspell/src/README.md b/extensions/spellcheck/hunspell/src/README.md new file mode 100644 index 0000000000..90ef880e86 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/README.md @@ -0,0 +1,325 @@ +# About Hunspell + +Hunspell is a free spell checker and morphological analyzer library +and command-line tool, licensed under LGPL/GPL/MPL tri-license. + +Hunspell is used by LibreOffice office suite, free browsers, like +Mozilla Firefox and Google Chrome, and other tools and OSes, like +Linux distributions and macOS. It is also a command-line tool for +Linux, Unix-like and other OSes. + +It is designed for quick and high quality spell checking and +correcting for languages with word-level writing system, +including languages with rich morphology, complex word compounding +and character encoding. + +Hunspell interfaces: Ispell-like terminal interface using Curses +library, Ispell pipe interface, C++/C APIs and shared library, also +with existing language bindings for other programming languages. + +Hunspell's code base comes from OpenOffice.org's MySpell library, +developed by Kevin Hendricks (originally a C++ reimplementation of +spell checking and affixation of Geoff Kuenning's International +Ispell from scratch, later extended with eg. n-gram suggestions), +see http://lingucomponent.openoffice.org/MySpell-3.zip, and +its README, CONTRIBUTORS and license.readme (here: license.myspell) files. + +Main features of Hunspell library, developed by László Németh: + + - Unicode support + - Highly customizable suggestions: word-part replacement tables and + stem-level phonetic and other alternative transcriptions to recognize + and fix all typical misspellings, don't suggest offensive words etc. + - Complex morphology: dictionary and affix homonyms; twofold affix + stripping to handle inflectional and derivational morpheme groups for + agglutinative languages, like Azeri, Basque, Estonian, Finnish, Hungarian, + Turkish; 64 thousand affix classes with arbitrary number of affixes; + conditional affixes, circumfixes, fogemorphemes, zero morphemes, + virtual dictionary stems, forbidden words to avoid overgeneration etc. + - Handling complex compounds (for example, for Finno-Ugric, German and + Indo-Aryan languages): recognizing compounds made of arbitrary + number of words, handle affixation within compounds etc. + - Custom dictionaries with affixation + - Stemming + - Morphological analysis (in custom item and arrangement style) + - Morphological generation + - SPELLML XML API over plain spell() API function for easier integration + of stemming, morpological generation and custom dictionaries with affixation + - Language specific algorithms, like special casing of Azeri or Turkish + dotted i and German sharp s, and special compound rules of Hungarian. + +Main features of Hunspell command line tool, developed by László Németh: + + - Reimplementation of quick interactive interface of Geoff Kuenning's Ispell + - Parsing formats: text, OpenDocument, TeX/LaTeX, HTML/SGML/XML, nroff/troff + - Custom dictionaries with optional affixation, specified by a model word + - Multiple dictionary usage (for example hunspell -d en_US,de_DE,de_medical) + - Various filtering options (bad or good words/lines) + - Morphological analysis (option -m) + - Stemming (option -s) + +See man hunspell, man 3 hunspell, man 5 hunspell for complete manual. + +Translations: Hunspell has been translated into several languages already. If your language is missing or incomplete, please use [Weblate](https://hosted.weblate.org/engage/hunspell/) to help translate Hunspell. + +<a href="https://hosted.weblate.org/engage/hunspell/"> +<img src="https://hosted.weblate.org/widgets/hunspell/-/translations/horizontal-auto.svg" alt="Stanje prijevoda" /> +</a> + +# Dependencies + +Build only dependencies: + + g++ make autoconf automake autopoint libtool + +Runtime dependencies: + +| | Mandatory | Optional | +|---------------|------------------|------------------| +|libhunspell | | | +|hunspell tool | libiconv gettext | ncurses readline | + +# Compiling on GNU/Linux and Unixes + +We first need to download the dependencies. On Linux, `gettext` and +`libiconv` are part of the standard library. On other Unixes we +need to manually install them. + +For Ubuntu: + + sudo apt install autoconf automake autopoint libtool + +Then run the following commands: + + autoreconf -vfi + ./configure + make + sudo make install + sudo ldconfig + +For dictionary development, use the `--with-warnings` option of +configure. + +For interactive user interface of Hunspell executable, use the +`--with-ui` option. + +Optional developer packages: + + - ncurses (need for --with-ui), eg. libncursesw5 for UTF-8 + - readline (for fancy input line editing, configure parameter: + --with-readline) + +In Ubuntu, the packages are: + + libncurses5-dev libreadline-dev + +# Compiling on OSX and macOS + +On macOS for compiler always use `clang` and not `g++` because Homebrew +dependencies are build with that. + + brew install autoconf automake libtool gettext + brew link gettext --force + +Then run: + + autoreconf -vfi + ./configure + make + +# Compiling on Windows + +## Compiling with Mingw64 and MSYS2 + +Download Msys2, update everything and install the following + packages: + + pacman -S base-devel mingw-w64-x86_64-toolchain mingw-w64-x86_64-libtool + +Open Mingw-w64 Win64 prompt and compile the same way as on Linux, see +above. + +## Compiling in Cygwin environment + +Download and install Cygwin environment for Windows with the following +extra packages: + + - make + - automake + - autoconf + - libtool + - gcc-g++ development package + - ncurses, readline (for user interface) + - iconv (character conversion) + +Then compile the same way as on Linux. Cygwin builds depend on +Cygwin1.dll. + +# Debugging + +It is recommended to install a debug build of the standard library: + + libstdc++6-6-dbg + +For debugging we need to create a debug build and then we need to start +`gdb`. + + ./configure CXXFLAGS='-g -O0 -Wall -Wextra' + make + ./libtool --mode=execute gdb src/tools/hunspell + +You can also pass the `CXXFLAGS` directly to `make` without calling +`./configure`, but we don't recommend this way during long development +sessions. + +If you like to develop and debug with an IDE, see documentation at +https://github.com/hunspell/hunspell/wiki/IDE-Setup + +# Testing + +Testing Hunspell (see tests in tests/ subdirectory): + + make check + +or with Valgrind debugger: + + make check + VALGRIND=[Valgrind_tool] make check + +For example: + + make check + VALGRIND=memcheck make check + +# Documentation + +features and dictionary format: + + man 5 hunspell + man hunspell + hunspell -h + +http://hunspell.github.io/ + +# Usage + +After compiling and installing (see INSTALL) you can run the Hunspell +spell checker (compiled with user interface) with a Hunspell or Myspell +dictionary: + + hunspell -d en_US text.txt + +or without interface: + + hunspell + hunspell -d en_GB -l <text.txt + +Dictionaries consist of an affix (.aff) and dictionary (.dic) file, for +example, download American English dictionary files of LibreOffice +(older version, but with stemming and morphological generation) with + + wget -O en_US.aff https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.aff?id=a4473e06b56bfe35187e302754f6baaa8d75e54f + wget -O en_US.dic https://cgit.freedesktop.org/libreoffice/dictionaries/plain/en/en_US.dic?id=a4473e06b56bfe35187e302754f6baaa8d75e54f + +and with command line input and output, it's possible to check its work quickly, +for example with the input words "example", "examples", "teached" and +"verybaaaaaaaaaaaaaaaaaaaaaad": + + $ hunspell -d en_US + Hunspell 1.7.0 + example + * + + examples + + example + + teached + & teached 9 0: taught, teased, reached, teaches, teacher, leached, beached + + verybaaaaaaaaaaaaaaaaaaaaaad + # verybaaaaaaaaaaaaaaaaaaaaaad 0 + +Where in the output, `*` and `+` mean correct (accepted) words (`*` = dictionary stem, +`+` = affixed forms of the following dictionary stem), and +`&` and `#` mean bad (rejected) words (`&` = with suggestions, `#` = without suggestions) +(see man hunspell). + +Example for stemming: + + $ hunspell -d en_US -s + mice + mice mouse + +Example for morphological analysis (very limited with this English dictionary): + + $ hunspell -d en_US -m + mice + mice st:mouse ts:Ns + + cats + cats st:cat ts:0 is:Ns + cats st:cat ts:0 is:Vs + +# Other executables + +The src/tools directory contains the following executables after compiling. + + - The main executable: + - hunspell: main program for spell checking and others (see + manual) + - Example tools: + - analyze: example of spell checking, stemming and morphological + analysis + - chmorph: example of automatic morphological generation and + conversion + - example: example of spell checking and suggestion + - Tools for dictionary development: + - affixcompress: dictionary generation from large (millions of + words) vocabularies + - makealias: alias compression (Hunspell only, not back compatible + with MySpell) + - wordforms: word generation (Hunspell version of unmunch) + - hunzip: decompressor of hzip format + - hzip: compressor of hzip format + - munch (DEPRECATED, use affixcompress): dictionary generation + from vocabularies (it needs an affix file, too). + - unmunch (DEPRECATED, use wordforms): list all recognized words + of a MySpell dictionary + +Example for morphological generation: + + $ ~/hunspell/src/tools/analyze en_US.aff en_US.dic /dev/stdin + cat mice + generate(cat, mice) = cats + mouse cats + generate(mouse, cats) = mice + generate(mouse, cats) = mouses + +# Using Hunspell library with GCC + +Including in your program: + + #include <hunspell.hxx> + +Linking with Hunspell static library: + + g++ -lhunspell-1.7 example.cxx + # or better, use pkg-config + g++ $(pkg-config --cflags --libs hunspell) example.cxx + +## Dictionaries + +Hunspell (MySpell) dictionaries: + + - https://wiki.documentfoundation.org/Language_support_of_LibreOffice + - http://cgit.freedesktop.org/libreoffice/dictionaries + - http://extensions.libreoffice.org + - https://extensions.openoffice.org + - https://wiki.openoffice.org/wiki/Dictionaries + +Aspell dictionaries (conversion: man 5 hunspell): + + - ftp://ftp.gnu.org/gnu/aspell/dict + +László Németh, nemeth at numbertext org + diff --git a/extensions/spellcheck/hunspell/src/affentry.cxx b/extensions/spellcheck/hunspell/src/affentry.cxx new file mode 100644 index 0000000000..2cf4f4671f --- /dev/null +++ b/extensions/spellcheck/hunspell/src/affentry.cxx @@ -0,0 +1,983 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> + +#include "affentry.hxx" +#include "csutil.hxx" + +AffEntry::~AffEntry() { + if (opts & aeLONGCOND) + free(c.l.conds2); + if (morphcode && !(opts & aeALIASM)) + free(morphcode); + if (contclass && !(opts & aeALIASF)) + free(contclass); +} + +PfxEntry::PfxEntry(AffixMgr* pmgr) + // register affix manager + : pmyMgr(pmgr), + next(NULL), + nexteq(NULL), + nextne(NULL), + flgnxt(NULL) { +} + +// add prefix to this word assuming conditions hold +std::string PfxEntry::add(const char* word, size_t len) { + std::string result; + if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word) && + (!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) { + /* we have a match so add prefix */ + result.assign(appnd); + result.append(word + strip.size()); + } + return result; +} + +inline char* PfxEntry::nextchar(char* p) { + if (p) { + p++; + if (opts & aeLONGCOND) { + // jump to the 2nd part of the condition + if (p == c.conds + MAXCONDLEN_1) + return c.l.conds2; + // end of the MAXCONDLEN length condition + } else if (p == c.conds + MAXCONDLEN) + return NULL; + return *p ? p : NULL; + } + return NULL; +} + +inline int PfxEntry::test_condition(const char* st) { + const char* pos = NULL; // group with pos input position + bool neg = false; // complementer + bool ingroup = false; // character in the group + if (numconds == 0) + return 1; + char* p = c.conds; + while (1) { + switch (*p) { + case '\0': + return 1; + case '[': { + neg = false; + ingroup = false; + p = nextchar(p); + pos = st; + break; + } + case '^': { + p = nextchar(p); + neg = true; + break; + } + case ']': { + if (bool(neg) == bool(ingroup)) + return 0; + pos = NULL; + p = nextchar(p); + // skip the next character + if (!ingroup && *st) + for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) + ; + if (*st == '\0' && p) + return 0; // word <= condition + break; + } + case '.': + if (!pos) { // dots are not metacharacters in groups: [.] + p = nextchar(p); + // skip the next character + for (st++; (opts & aeUTF8) && (*st & 0xc0) == 0x80; st++) + ; + if (*st == '\0' && p) + return 0; // word <= condition + break; + } + /* FALLTHROUGH */ + default: { + if (*st == *p) { + st++; + p = nextchar(p); + if ((opts & aeUTF8) && (*(st - 1) & 0x80)) { // multibyte + while (p && (*p & 0xc0) == 0x80) { // character + if (*p != *st) { + if (!pos) + return 0; + st = pos; + break; + } + p = nextchar(p); + st++; + } + if (pos && st != pos) { + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + } + } else if (pos) { + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + } + } else if (pos) { // group + p = nextchar(p); + } else + return 0; + } + } + if (!p) + return 1; + } +} + +// check if this prefix entry matches +struct hentry* PfxEntry::checkword(const char* word, + int len, + char in_compound, + const FLAG needflag) { + struct hentry* he; // hash entry of root word or NULL + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + + if (tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + std::string tmpword(strip); + tmpword.append(word + appnd.size(), tmpl); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword.c_str())) { + tmpl += strip.size(); + if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) { + do { + if (TESTAFF(he->astr, aflag, he->alen) && + // forbid single prefixes with needaffix flag + !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && + // needflag + ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || + (contclass && TESTAFF(contclass, needflag, contclasslen)))) + return he; + he = he->next_homonym; // check homonyms + } while (he); + } + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + // if ((opts & aeXPRODUCT) && in_compound) { + if ((opts & aeXPRODUCT)) { + he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this, + FLAG_NULL, needflag, in_compound); + if (he) + return he; + } + } + } + return NULL; +} + +// check if this prefix entry matches +struct hentry* PfxEntry::check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag) { + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + std::string tmpword(strip); + tmpword.append(word + appnd.size()); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword.c_str())) { + tmpl += strip.size(); + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // cross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + // hash entry of root word or NULL + struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this, + needflag); + if (he) + return he; + } + } + } + return NULL; +} + +// check if this prefix entry matches +std::string PfxEntry::check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + std::string result; + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + int tmpl = len - appnd.size(); // length of tmpword + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + std::string tmpword(strip); + tmpword.append(word + appnd.size()); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword.c_str())) { + tmpl += strip.size(); + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl, + aeXPRODUCT, + this, needflag); + } + } + } + return result; +} + +// check if this prefix entry matches +std::string PfxEntry::check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + std::string result; + + // on entry prefix is 0 length or already matches the beginning of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing prefix and adding + // back any characters that would have been stripped + + std::string tmpword(strip); + tmpword.append(word + appnd.size()); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(tmpword.c_str())) { + tmpl += strip.size(); + struct hentry* he; // hash entry of root word or NULL + if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) { + do { + if (TESTAFF(he->astr, aflag, he->alen) && + // forbid single prefixes with needaffix flag + !TESTAFF(contclass, pmyMgr->get_needaffix(), contclasslen) && + // needflag + ((!needflag) || TESTAFF(he->astr, needflag, he->alen) || + (contclass && TESTAFF(contclass, needflag, contclasslen)))) { + if (morphcode) { + result.push_back(MSEP_FLD); + result.append(morphcode); + } else + result.append(getKey()); + if (!HENTRY_FIND(he, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(he)); + } + // store the pointer of the hash entry + if (HENTRY_DATA(he)) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(he)); + } else { + // return with debug information + char* flag = pmyMgr->encode_flag(getFlag()); + result.push_back(MSEP_FLD); + result.append(MORPH_FLAG); + result.append(flag); + free(flag); + } + result.push_back(MSEP_REC); + } + he = he->next_homonym; + } while (he); + } + + // prefix matched but no root word was found + // if aeXPRODUCT is allowed, try again but now + // ross checked combined with a suffix + + if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) { + std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this, + FLAG_NULL, needflag); + if (!st.empty()) { + result.append(st); + } + } + } + } + + return result; +} + +SfxEntry::SfxEntry(AffixMgr* pmgr) + : pmyMgr(pmgr) // register affix manager + , + next(NULL), + nexteq(NULL), + nextne(NULL), + flgnxt(NULL), + l_morph(NULL), + r_morph(NULL), + eq_morph(NULL) { +} + +// add suffix to this word assuming conditions hold +std::string SfxEntry::add(const char* word, size_t len) { + std::string result; + /* make sure all conditions match */ + if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) && + (len >= numconds) && test_condition(word + len, word) && + (!strip.size() || + (strcmp(word + len - strip.size(), strip.c_str()) == 0))) { + result.assign(word); + /* we have a match so add suffix */ + result.replace(len - strip.size(), std::string::npos, appnd); + } + return result; +} + +inline char* SfxEntry::nextchar(char* p) { + if (p) { + p++; + if (opts & aeLONGCOND) { + // jump to the 2nd part of the condition + if (p == c.l.conds1 + MAXCONDLEN_1) + return c.l.conds2; + // end of the MAXCONDLEN length condition + } else if (p == c.conds + MAXCONDLEN) + return NULL; + return *p ? p : NULL; + } + return NULL; +} + +inline int SfxEntry::test_condition(const char* st, const char* beg) { + const char* pos = NULL; // group with pos input position + bool neg = false; // complementer + bool ingroup = false; // character in the group + if (numconds == 0) + return 1; + char* p = c.conds; + st--; + int i = 1; + while (1) { + switch (*p) { + case '\0': + return 1; + case '[': + p = nextchar(p); + pos = st; + break; + case '^': + p = nextchar(p); + neg = true; + break; + case ']': + if (!neg && !ingroup) + return 0; + i++; + // skip the next character + if (!ingroup) { + for (; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; st--) + ; + st--; + } + pos = NULL; + neg = false; + ingroup = false; + p = nextchar(p); + if (st < beg && p) + return 0; // word <= condition + break; + case '.': + if (!pos) { + // dots are not metacharacters in groups: [.] + p = nextchar(p); + // skip the next character + for (st--; (opts & aeUTF8) && (st >= beg) && (*st & 0xc0) == 0x80; + st--) + ; + if (st < beg) { // word <= condition + if (p) + return 0; + else + return 1; + } + if ((opts & aeUTF8) && (*st & 0x80)) { // head of the UTF-8 character + st--; + if (st < beg) { // word <= condition + if (p) + return 0; + else + return 1; + } + } + break; + } + /* FALLTHROUGH */ + default: { + if (*st == *p) { + p = nextchar(p); + if ((opts & aeUTF8) && (*st & 0x80)) { + st--; + while (p && (st >= beg)) { + if (*p != *st) { + if (!pos) + return 0; + st = pos; + break; + } + // first byte of the UTF-8 multibyte character + if ((*p & 0xc0) != 0x80) + break; + p = nextchar(p); + st--; + } + if (pos && st != pos) { + if (neg) + return 0; + else if (i == numconds) + return 1; + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + st--; + } + if (p && *p != ']') + p = nextchar(p); + } else if (pos) { + if (neg) + return 0; + else if (i == numconds) + return 1; + ingroup = true; + while (p && *p != ']' && ((p = nextchar(p)) != NULL)) { + } + // if (p && *p != ']') p = nextchar(p); + st--; + } + if (!pos) { + i++; + st--; + } + if (st < beg && p && *p != ']') + return 0; // word <= condition + } else if (pos) { // group + p = nextchar(p); + } else + return 0; + } + } + if (!p) + return 1; + } +} + +// see if this suffix is present in the word +struct hentry* SfxEntry::checkword(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag, + const FLAG badflag) { + struct hentry* he; // hash entry pointer + PfxEntry* ep = ppfx; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if (((optflags & aeXPRODUCT) != 0) && ((opts & aeXPRODUCT) == 0)) + return NULL; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + // the second condition is not enough for UTF-8 strings + // it checked in test_condition() + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + std::string tmpstring(word, tmpl); + if (strip.size()) { + tmpstring.append(strip); + } + + const char* tmpword = tmpstring.c_str(); + const char* endword = tmpword + tmpstring.size(); + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then check if resulting + // root word in the dictionary + + if (test_condition(endword, tmpword)) { +#ifdef SZOSZABLYA_POSSIBLE_ROOTS + fprintf(stdout, "%s %s %c\n", word, tmpword, aflag); +#endif + if ((he = pmyMgr->lookup(tmpword)) != NULL) { + do { + // check conditional suffix (enabled by prefix) + if ((TESTAFF(he->astr, aflag, he->alen) || + (ep && ep->getCont() && + TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && + (((optflags & aeXPRODUCT) == 0) || + (ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) || + // enabled by prefix + ((contclass) && + (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))) && + // handle cont. class + ((!cclass) || + ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && + // check only in compound homonyms (bad flags) + (!badflag || !TESTAFF(he->astr, badflag, he->alen)) && + // handle required flag + ((!needflag) || + (TESTAFF(he->astr, needflag, he->alen) || + ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) + return he; + he = he->next_homonym; // check homonyms + } while (he); + } + } + } + return NULL; +} + +// see if two-level suffix is present in the word +struct hentry* SfxEntry::check_twosfx(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag) { + PfxEntry* ep = ppfx; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) + return NULL; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + std::string tmpword(word); + tmpword.resize(tmpl); + tmpword.append(strip); + tmpl += strip.size(); + + const char* beg = tmpword.c_str(); + const char* end = beg + tmpl; + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then recall suffix_check + + if (test_condition(end, beg)) { + struct hentry* he; // hash entry pointer + if (ppfx) { + // handle conditional suffix + if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) + he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, + (FLAG)aflag, needflag, IN_CPD_NOT); + else + he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, + (FLAG)aflag, needflag, IN_CPD_NOT); + } else { + he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, + (FLAG)aflag, needflag, IN_CPD_NOT); + } + if (he) + return he; + } + } + return NULL; +} + +// see if two-level suffix is present in the word +std::string SfxEntry::check_twosfx_morph(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag) { + PfxEntry* ep = ppfx; + + std::string result; + + // if this suffix is being cross checked with a prefix + // but it does not support cross products skip it + + if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0) + return result; + + // upon entry suffix is 0 length or already matches the end of the word. + // So if the remaining root word has positive length + // and if there are enough chars in root word and added back strip chars + // to meet the number of characters conditions, then test it + + int tmpl = len - appnd.size(); // length of tmpword + + if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) && + (tmpl + strip.size() >= numconds)) { + // generate new root word by removing suffix and adding + // back any characters that would have been stripped or + // or null terminating the shorter string + + std::string tmpword(word); + tmpword.resize(tmpl); + tmpword.append(strip); + tmpl += strip.size(); + + const char* beg = tmpword.c_str(); + const char* end = beg + tmpl; + + // now make sure all of the conditions on characters + // are met. Please see the appendix at the end of + // this file for more info on exactly what is being + // tested + + // if all conditions are met then recall suffix_check + + if (test_condition(end, beg)) { + if (ppfx) { + // handle conditional suffix + if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) { + std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, + needflag); + if (!st.empty()) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); + result.push_back(MSEP_FLD); + } + result.append(st); + mychomp(result); + } + } else { + std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag, + needflag); + if (!st.empty()) { + result.append(st); + mychomp(result); + } + } + } else { + std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag); + if (!st.empty()) { + result.append(st); + mychomp(result); + } + } + } + } + return result; +} + +// get next homonym with same affix +struct hentry* SfxEntry::get_next_homonym(struct hentry* he, + int optflags, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag) { + PfxEntry* ep = ppfx; + FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL; + + while (he->next_homonym) { + he = he->next_homonym; + if ((TESTAFF(he->astr, aflag, he->alen) || + (ep && ep->getCont() && + TESTAFF(ep->getCont(), aflag, ep->getContLen()))) && + ((optflags & aeXPRODUCT) == 0 || TESTAFF(he->astr, eFlag, he->alen) || + // handle conditional suffix + ((contclass) && TESTAFF(contclass, eFlag, contclasslen))) && + // handle cont. class + ((!cclass) || + ((contclass) && TESTAFF(contclass, cclass, contclasslen))) && + // handle required flag + ((!needflag) || + (TESTAFF(he->astr, needflag, he->alen) || + ((contclass) && TESTAFF(contclass, needflag, contclasslen))))) + return he; + } + return NULL; +} + +void SfxEntry::initReverseWord() { + rappnd = appnd; + reverseword(rappnd); +} + +#if 0 + +Appendix: Understanding Affix Code + + +An affix is either a prefix or a suffix attached to root words to make +other words. + +Basically a Prefix or a Suffix is set of AffEntry objects +which store information about the prefix or suffix along +with supporting routines to check if a word has a particular +prefix or suffix or a combination. + +The structure affentry is defined as follows: + +struct affentry +{ + unsigned short aflag; // ID used to represent the affix + std::string strip; // string to strip before adding affix + std::string appnd; // the affix string to add + char numconds; // the number of conditions that must be met + char opts; // flag: aeXPRODUCT- combine both prefix and suffix + char conds[SETSIZE]; // array which encodes the conditions to be met +}; + + +Here is a suffix borrowed from the en_US.aff file. This file +is whitespace delimited. + +SFX D Y 4 +SFX D 0 e d +SFX D y ied [^aeiou]y +SFX D 0 ed [^ey] +SFX D 0 ed [aeiou]y + +This information can be interpreted as follows: + +In the first line has 4 fields + +Field +----- +1 SFX - indicates this is a suffix +2 D - is the name of the character flag which represents this suffix +3 Y - indicates it can be combined with prefixes (cross product) +4 4 - indicates that sequence of 4 affentry structures are needed to + properly store the affix information + +The remaining lines describe the unique information for the 4 SfxEntry +objects that make up this affix. Each line can be interpreted +as follows: (note fields 1 and 2 are as a check against line 1 info) + +Field +----- +1 SFX - indicates this is a suffix +2 D - is the name of the character flag for this affix +3 y - the string of chars to strip off before adding affix + (a 0 here indicates the NULL string) +4 ied - the string of affix characters to add +5 [^aeiou]y - the conditions which must be met before the affix + can be applied + +Field 5 is interesting. Since this is a suffix, field 5 tells us that +there are 2 conditions that must be met. The first condition is that +the next to the last character in the word must *NOT* be any of the +following "a", "e", "i", "o" or "u". The second condition is that +the last character of the word must end in "y". + +So how can we encode this information concisely and be able to +test for both conditions in a fast manner? The answer is found +but studying the wonderful ispell code of Geoff Kuenning, et.al. +(now available under a normal BSD license). + +If we set up a conds array of 256 bytes indexed (0 to 255) and access it +using a character (cast to an unsigned char) of a string, we have 8 bits +of information we can store about that character. Specifically we +could use each bit to say if that character is allowed in any of the +last (or first for prefixes) 8 characters of the word. + +Basically, each character at one end of the word (up to the number +of conditions) is used to index into the conds array and the resulting +value found there says whether the that character is valid for a +specific character position in the word. + +For prefixes, it does this by setting bit 0 if that char is valid +in the first position, bit 1 if valid in the second position, and so on. + +If a bit is not set, then that char is not valid for that postion in the +word. + +If working with suffixes bit 0 is used for the character closest +to the front, bit 1 for the next character towards the end, ..., +with bit numconds-1 representing the last char at the end of the string. + +Note: since entries in the conds[] are 8 bits, only 8 conditions +(read that only 8 character positions) can be examined at one +end of a word (the beginning for prefixes and the end for suffixes. + +So to make this clearer, lets encode the conds array values for the +first two affentries for the suffix D described earlier. + + + For the first affentry: + numconds = 1 (only examine the last character) + + conds['e'] = (1 << 0) (the word must end in an E) + all others are all 0 + + For the second affentry: + numconds = 2 (only examine the last two characters) + + conds[X] = conds[X] | (1 << 0) (aeiou are not allowed) + where X is all characters *but* a, e, i, o, or u + + + conds['y'] = (1 << 1) (the last char must be a y) + all other bits for all other entries in the conds array are zero + +#endif diff --git a/extensions/spellcheck/hunspell/src/affentry.hxx b/extensions/spellcheck/hunspell/src/affentry.hxx new file mode 100644 index 0000000000..b736bf0350 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/affentry.hxx @@ -0,0 +1,223 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef AFFIX_HXX_ +#define AFFIX_HXX_ + +#include "atypes.hxx" +#include "baseaffix.hxx" +#include "affixmgr.hxx" + +/* A Prefix Entry */ + +class PfxEntry : public AffEntry { + private: + PfxEntry(const PfxEntry&); + PfxEntry& operator=(const PfxEntry&); + + private: + AffixMgr* pmyMgr; + + PfxEntry* next; + PfxEntry* nexteq; + PfxEntry* nextne; + PfxEntry* flgnxt; + + public: + explicit PfxEntry(AffixMgr* pmgr); + + bool allowCross() const { return ((opts & aeXPRODUCT) != 0); } + struct hentry* checkword(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + + struct hentry* check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + + std::string check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + + std::string check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + + FLAG getFlag() { return aflag; } + const char* getKey() { return appnd.c_str(); } + std::string add(const char* word, size_t len); + + inline short getKeyLen() { return appnd.size(); } + + inline const char* getMorph() { return morphcode; } + + inline const unsigned short* getCont() { return contclass; } + inline short getContLen() { return contclasslen; } + + inline PfxEntry* getNext() { return next; } + inline PfxEntry* getNextNE() { return nextne; } + inline PfxEntry* getNextEQ() { return nexteq; } + inline PfxEntry* getFlgNxt() { return flgnxt; } + + inline void setNext(PfxEntry* ptr) { next = ptr; } + inline void setNextNE(PfxEntry* ptr) { nextne = ptr; } + inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; } + inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; } + + inline char* nextchar(char* p); + inline int test_condition(const char* st); +}; + +/* A Suffix Entry */ + +class SfxEntry : public AffEntry { + private: + SfxEntry(const SfxEntry&); + SfxEntry& operator=(const SfxEntry&); + + private: + AffixMgr* pmyMgr; + std::string rappnd; + + SfxEntry* next; + SfxEntry* nexteq; + SfxEntry* nextne; + SfxEntry* flgnxt; + + SfxEntry* l_morph; + SfxEntry* r_morph; + SfxEntry* eq_morph; + + public: + explicit SfxEntry(AffixMgr* pmgr); + + bool allowCross() const { return ((opts & aeXPRODUCT) != 0); } + struct hentry* checkword(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag, + const FLAG badflag); + + struct hentry* check_twosfx(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag = FLAG_NULL); + + std::string check_twosfx_morph(const char* word, + int len, + int optflags, + PfxEntry* ppfx, + const FLAG needflag = FLAG_NULL); + struct hentry* get_next_homonym(struct hentry* he); + struct hentry* get_next_homonym(struct hentry* word, + int optflags, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag); + + FLAG getFlag() { return aflag; } + const char* getKey() { return rappnd.c_str(); } + std::string add(const char* word, size_t len); + + inline const char* getMorph() { return morphcode; } + + inline const unsigned short* getCont() { return contclass; } + inline short getContLen() { return contclasslen; } + inline const char* getAffix() { return appnd.c_str(); } + + inline short getKeyLen() { return appnd.size(); } + + inline SfxEntry* getNext() { return next; } + inline SfxEntry* getNextNE() { return nextne; } + inline SfxEntry* getNextEQ() { return nexteq; } + + inline SfxEntry* getLM() { return l_morph; } + inline SfxEntry* getRM() { return r_morph; } + inline SfxEntry* getEQM() { return eq_morph; } + inline SfxEntry* getFlgNxt() { return flgnxt; } + + inline void setNext(SfxEntry* ptr) { next = ptr; } + inline void setNextNE(SfxEntry* ptr) { nextne = ptr; } + inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; } + inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; } + void initReverseWord(); + + inline char* nextchar(char* p); + inline int test_condition(const char* st, const char* begin); +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/affixmgr.cxx b/extensions/spellcheck/hunspell/src/affixmgr.cxx new file mode 100644 index 0000000000..adb750dba1 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/affixmgr.cxx @@ -0,0 +1,4875 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <time.h> + +#include <algorithm> +#include <limits> +#include <string> +#include <vector> + +#include "affixmgr.hxx" +#include "affentry.hxx" +#include "langnum.hxx" + +#include "csutil.hxx" + +AffixMgr::AffixMgr(const char* affpath, + const std::vector<HashMgr*>& ptr, + const char* key) + : alldic(ptr) + , pHMgr(ptr[0]) { + + // register hash manager and load affix data from aff file + csconv = NULL; + utf8 = 0; + complexprefixes = 0; + parsedmaptable = false; + parsedbreaktable = false; + iconvtable = NULL; + oconvtable = NULL; + // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) + simplifiedcpd = 0; + parsedcheckcpd = false; + parseddefcpd = false; + phone = NULL; + compoundflag = FLAG_NULL; // permits word in compound forms + compoundbegin = FLAG_NULL; // may be first word in compound forms + compoundmiddle = FLAG_NULL; // may be middle word in compound forms + compoundend = FLAG_NULL; // may be last word in compound forms + compoundroot = FLAG_NULL; // compound word signing flag + compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word + compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + compoundmoresuffixes = 0; // allow more suffixes within compound words + checkcompounddup = 0; // forbid double words in compounds + checkcompoundrep = 0; // forbid bad compounds (may be non-compound word with + // a REP substitution) + checkcompoundcase = + 0; // forbid upper and lowercase combinations at word bounds + checkcompoundtriple = 0; // forbid compounds with triple letters + simplifiedtriple = 0; // allow simplified triple letters in compounds + // (Schiff+fahrt -> Schiffahrt) + forbiddenword = FORBIDDENWORD; // forbidden word signing flag + nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag + nongramsuggest = FLAG_NULL; + langnum = 0; // language code (see http://l10n.openoffice.org/languages.html) + needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes + cpdwordmax = -1; // default: unlimited wordcount in compound words + cpdmin = -1; // undefined + cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words + pfxappnd = NULL; // previous prefix for counting syllables of the prefix BUG + sfxappnd = NULL; // previous suffix for counting syllables of the suffix BUG + sfxextra = 0; // modifier for syllable count of sfxappnd BUG + checknum = 0; // checking numbers, and word with numbers + havecontclass = 0; // flags of possible continuing classes (double affix) + // LEMMA_PRESENT: not put root into the morphological output. Lemma presents + // in morhological description in dictionary file. It's often combined with + // PSEUDOROOT. + lemma_present = FLAG_NULL; + circumfix = FLAG_NULL; + onlyincompound = FLAG_NULL; + maxngramsugs = -1; // undefined + maxdiff = -1; // undefined + onlymaxdiff = 0; + maxcpdsugs = -1; // undefined + nosplitsugs = 0; + sugswithdots = 0; + keepcase = 0; + forceucase = 0; + warn = 0; + forbidwarn = 0; + checksharps = 0; + substandard = FLAG_NULL; + fullstrip = 0; + + sfx = NULL; + pfx = NULL; + + for (int i = 0; i < SETSIZE; i++) { + pStart[i] = NULL; + sStart[i] = NULL; + pFlag[i] = NULL; + sFlag[i] = NULL; + } + + for (int j = 0; j < CONTSIZE; j++) { + contclasses[j] = 0; + } + + if (parse_file(affpath, key)) { + HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n", affpath); + } + + if (cpdmin == -1) + cpdmin = MINCPDLEN; +} + +AffixMgr::~AffixMgr() { + // pass through linked prefix entries and clean up + for (int i = 0; i < SETSIZE; i++) { + pFlag[i] = NULL; + PfxEntry* ptr = pStart[i]; + PfxEntry* nptr = NULL; + while (ptr) { + nptr = ptr->getNext(); + delete (ptr); + ptr = nptr; + nptr = NULL; + } + } + + // pass through linked suffix entries and clean up + for (int j = 0; j < SETSIZE; j++) { + sFlag[j] = NULL; + SfxEntry* ptr = sStart[j]; + SfxEntry* nptr = NULL; + while (ptr) { + nptr = ptr->getNext(); + delete (ptr); + ptr = nptr; + nptr = NULL; + } + sStart[j] = NULL; + } + + delete iconvtable; + delete oconvtable; + delete phone; + + FREE_FLAG(compoundflag); + FREE_FLAG(compoundbegin); + FREE_FLAG(compoundmiddle); + FREE_FLAG(compoundend); + FREE_FLAG(compoundpermitflag); + FREE_FLAG(compoundforbidflag); + FREE_FLAG(compoundroot); + FREE_FLAG(forbiddenword); + FREE_FLAG(nosuggest); + FREE_FLAG(nongramsuggest); + FREE_FLAG(needaffix); + FREE_FLAG(lemma_present); + FREE_FLAG(circumfix); + FREE_FLAG(onlyincompound); + + cpdwordmax = 0; + pHMgr = NULL; + cpdmin = 0; + cpdmaxsyllable = 0; + free_utf_tbl(); + checknum = 0; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif +} + +void AffixMgr::finishFileMgr(FileMgr* afflst) { + delete afflst; + + // convert affix trees to sorted list + process_pfx_tree_to_list(); + process_sfx_tree_to_list(); +} + +// read in aff file and build up prefix and suffix entry objects +int AffixMgr::parse_file(const char* affpath, const char* key) { + + // checking flag duplication + char dupflags[CONTSIZE]; + char dupflags_ini = 1; + + // first line indicator for removing byte order mark + int firstline = 1; + + // open the affix file + FileMgr* afflst = new FileMgr(affpath, key); + if (!afflst) { + HUNSPELL_WARNING( + stderr, "error: could not open affix description file %s\n", affpath); + return 1; + } + + // step one is to parse the affix file building up the internal + // affix data structures + + // read in each line ignoring any that do not + // start with a known line type indicator + std::string line; + while (afflst->getline(line)) { + mychomp(line); + + /* remove byte order mark */ + if (firstline) { + firstline = 0; + // Affix file begins with byte order mark: possible incompatibility with + // old Hunspell versions + if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) { + line.erase(0, 3); + } + } + + /* parse in the keyboard string */ + if (line.compare(0, 3, "KEY", 3) == 0) { + if (!parse_string(line, keystring, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the try string */ + if (line.compare(0, 3, "TRY", 3) == 0) { + if (!parse_string(line, trystring, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the name of the character set used by the .dict and .aff */ + if (line.compare(0, 3, "SET", 3) == 0) { + if (!parse_string(line, encoding, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + if (encoding == "UTF-8") { + utf8 = 1; +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + initialize_utf_tbl(); +#endif +#endif + } + } + + /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left + * writing system */ + if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0) + complexprefixes = 1; + + /* parse in the flag used by the controlled compound words */ + if (line.compare(0, 12, "COMPOUNDFLAG", 12) == 0) { + if (!parse_flag(line, &compoundflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound words */ + if (line.compare(0, 13, "COMPOUNDBEGIN", 13) == 0) { + if (complexprefixes) { + if (!parse_flag(line, &compoundend, afflst)) { + finishFileMgr(afflst); + return 1; + } + } else { + if (!parse_flag(line, &compoundbegin, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + } + + /* parse in the flag used by compound words */ + if (line.compare(0, 14, "COMPOUNDMIDDLE", 14) == 0) { + if (!parse_flag(line, &compoundmiddle, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound words */ + if (line.compare(0, 11, "COMPOUNDEND", 11) == 0) { + if (complexprefixes) { + if (!parse_flag(line, &compoundbegin, afflst)) { + finishFileMgr(afflst); + return 1; + } + } else { + if (!parse_flag(line, &compoundend, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + } + + /* parse in the data used by compound_check() method */ + if (line.compare(0, 15, "COMPOUNDWORDMAX", 15) == 0) { + if (!parse_num(line, &cpdwordmax, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag sign compounds in dictionary */ + if (line.compare(0, 12, "COMPOUNDROOT", 12) == 0) { + if (!parse_flag(line, &compoundroot, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (line.compare(0, 18, "COMPOUNDPERMITFLAG", 18) == 0) { + if (!parse_flag(line, &compoundpermitflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (line.compare(0, 18, "COMPOUNDFORBIDFLAG", 18) == 0) { + if (!parse_flag(line, &compoundforbidflag, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 20, "COMPOUNDMORESUFFIXES", 20) == 0) { + compoundmoresuffixes = 1; + } + + if (line.compare(0, 16, "CHECKCOMPOUNDDUP", 16) == 0) { + checkcompounddup = 1; + } + + if (line.compare(0, 16, "CHECKCOMPOUNDREP", 16) == 0) { + checkcompoundrep = 1; + } + + if (line.compare(0, 19, "CHECKCOMPOUNDTRIPLE", 19) == 0) { + checkcompoundtriple = 1; + } + + if (line.compare(0, 16, "SIMPLIFIEDTRIPLE", 16) == 0) { + simplifiedtriple = 1; + } + + if (line.compare(0, 17, "CHECKCOMPOUNDCASE", 17) == 0) { + checkcompoundcase = 1; + } + + if (line.compare(0, 9, "NOSUGGEST", 9) == 0) { + if (!parse_flag(line, &nosuggest, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 14, "NONGRAMSUGGEST", 14) == 0) { + if (!parse_flag(line, &nongramsuggest, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by forbidden words */ + if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) { + if (!parse_flag(line, &forbiddenword, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by forbidden words (is deprecated) */ + if (line.compare(0, 13, "LEMMA_PRESENT", 13) == 0) { + if (!parse_flag(line, &lemma_present, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by circumfixes */ + if (line.compare(0, 9, "CIRCUMFIX", 9) == 0) { + if (!parse_flag(line, &circumfix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by fogemorphemes */ + if (line.compare(0, 14, "ONLYINCOMPOUND", 14) == 0) { + if (!parse_flag(line, &onlyincompound, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `needaffixs' (is deprecated) */ + if (line.compare(0, 10, "PSEUDOROOT", 10) == 0) { + if (!parse_flag(line, &needaffix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `needaffixs' */ + if (line.compare(0, 9, "NEEDAFFIX", 9) == 0) { + if (!parse_flag(line, &needaffix, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the minimal length for words in compounds */ + if (line.compare(0, 11, "COMPOUNDMIN", 11) == 0) { + if (!parse_num(line, &cpdmin, afflst)) { + finishFileMgr(afflst); + return 1; + } + if (cpdmin < 1) + cpdmin = 1; + } + + /* parse in the max. words and syllables in compounds */ + if (line.compare(0, 16, "COMPOUNDSYLLABLE", 16) == 0) { + if (!parse_cpdsyllable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by compound_check() method */ + if (line.compare(0, 11, "SYLLABLENUM", 11) == 0) { + if (!parse_string(line, cpdsyllablenum, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by the controlled compound words */ + if (line.compare(0, 8, "CHECKNUM", 8) == 0) { + checknum = 1; + } + + /* parse in the extra word characters */ + if (line.compare(0, 9, "WORDCHARS", 9) == 0) { + if (!parse_array(line, wordchars, wordchars_utf16, + utf8, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the ignored characters (for example, Arabic optional diacretics + * charachters */ + if (line.compare(0, 6, "IGNORE", 6) == 0) { + if (!parse_array(line, ignorechars, ignorechars_utf16, + utf8, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the input conversion table */ + if (line.compare(0, 5, "ICONV", 5) == 0) { + if (!parse_convtable(line, afflst, &iconvtable, "ICONV")) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the output conversion table */ + if (line.compare(0, 5, "OCONV", 5) == 0) { + if (!parse_convtable(line, afflst, &oconvtable, "OCONV")) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the phonetic translation table */ + if (line.compare(0, 5, "PHONE", 5) == 0) { + if (!parse_phonetable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the checkcompoundpattern table */ + if (line.compare(0, 20, "CHECKCOMPOUNDPATTERN", 20) == 0) { + if (!parse_checkcpdtable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the defcompound table */ + if (line.compare(0, 12, "COMPOUNDRULE", 12) == 0) { + if (!parse_defcpdtable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the related character map table */ + if (line.compare(0, 3, "MAP", 3) == 0) { + if (!parse_maptable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the word breakpoints table */ + if (line.compare(0, 5, "BREAK", 5) == 0) { + if (!parse_breaktable(line, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the language for language specific codes */ + if (line.compare(0, 4, "LANG", 4) == 0) { + if (!parse_string(line, lang, afflst->getlinenum())) { + finishFileMgr(afflst); + return 1; + } + langnum = get_lang_num(lang); + } + + if (line.compare(0, 7, "VERSION", 7) == 0) { + size_t startpos = line.find_first_not_of(" \t", 7); + if (startpos != std::string::npos) { + version = line.substr(startpos); + } + } + + if (line.compare(0, 12, "MAXNGRAMSUGS", 12) == 0) { + if (!parse_num(line, &maxngramsugs, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 11, "ONLYMAXDIFF", 11) == 0) + onlymaxdiff = 1; + + if (line.compare(0, 7, "MAXDIFF", 7) == 0) { + if (!parse_num(line, &maxdiff, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 10, "MAXCPDSUGS", 10) == 0) { + if (!parse_num(line, &maxcpdsugs, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 11, "NOSPLITSUGS", 11) == 0) { + nosplitsugs = 1; + } + + if (line.compare(0, 9, "FULLSTRIP", 9) == 0) { + fullstrip = 1; + } + + if (line.compare(0, 12, "SUGSWITHDOTS", 12) == 0) { + sugswithdots = 1; + } + + /* parse in the flag used by forbidden words */ + if (line.compare(0, 8, "KEEPCASE", 8) == 0) { + if (!parse_flag(line, &keepcase, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `forceucase' */ + if (line.compare(0, 10, "FORCEUCASE", 10) == 0) { + if (!parse_flag(line, &forceucase, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + /* parse in the flag used by `warn' */ + if (line.compare(0, 4, "WARN", 4) == 0) { + if (!parse_flag(line, &warn, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 10, "FORBIDWARN", 10) == 0) { + forbidwarn = 1; + } + + /* parse in the flag used by the affix generator */ + if (line.compare(0, 11, "SUBSTANDARD", 11) == 0) { + if (!parse_flag(line, &substandard, afflst)) { + finishFileMgr(afflst); + return 1; + } + } + + if (line.compare(0, 11, "CHECKSHARPS", 11) == 0) { + checksharps = 1; + } + + /* parse this affix: P - prefix, S - suffix */ + // affix type + char ft = ' '; + if (line.compare(0, 3, "PFX", 3) == 0) + ft = complexprefixes ? 'S' : 'P'; + if (line.compare(0, 3, "SFX", 3) == 0) + ft = complexprefixes ? 'P' : 'S'; + if (ft != ' ') { + if (dupflags_ini) { + memset(dupflags, 0, sizeof(dupflags)); + dupflags_ini = 0; + } + if (!parse_affix(line, ft, afflst, dupflags)) { + finishFileMgr(afflst); + return 1; + } + } + } + + finishFileMgr(afflst); + // affix trees are sorted now + + // now we can speed up performance greatly taking advantage of the + // relationship between the affixes and the idea of "subsets". + + // View each prefix as a potential leading subset of another and view + // each suffix (reversed) as a potential trailing subset of another. + + // To illustrate this relationship if we know the prefix "ab" is found in the + // word to examine, only prefixes that "ab" is a leading subset of need be + // examined. + // Furthermore is "ab" is not present then none of the prefixes that "ab" is + // is a subset need be examined. + // The same argument goes for suffix string that are reversed. + + // Then to top this off why not examine the first char of the word to quickly + // limit the set of prefixes to examine (i.e. the prefixes to examine must + // be leading supersets of the first character of the word (if they exist) + + // To take advantage of this "subset" relationship, we need to add two links + // from entry. One to take next if the current prefix is found (call it + // nexteq) + // and one to take next if the current prefix is not found (call it nextne). + + // Since we have built ordered lists, all that remains is to properly + // initialize + // the nextne and nexteq pointers that relate them + + process_pfx_order(); + process_sfx_order(); + + /* get encoding for CHECKCOMPOUNDCASE */ + if (!utf8) { + csconv = get_current_cs(get_encoding()); + for (int i = 0; i <= 255; i++) { + if ((csconv[i].cupper != csconv[i].clower) && + (wordchars.find((char)i) == std::string::npos)) { + wordchars.push_back((char)i); + } + } + + } + + // default BREAK definition + if (!parsedbreaktable) { + breaktable.push_back("-"); + breaktable.push_back("^-"); + breaktable.push_back("-$"); + parsedbreaktable = true; + } + return 0; +} + +// we want to be able to quickly access prefix information +// both by prefix flag, and sorted by prefix string itself +// so we need to set up two indexes + +int AffixMgr::build_pfxtree(PfxEntry* pfxptr) { + PfxEntry* ptr; + PfxEntry* pptr; + PfxEntry* ep = pfxptr; + + // get the right starting points + const char* key = ep->getKey(); + const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); + + // first index by flag which must exist + ptr = pFlag[flg]; + ep->setFlgNxt(ptr); + pFlag[flg] = ep; + + // handle the special case of null affix string + if (strlen(key) == 0) { + // always inset them at head of list at element 0 + ptr = pStart[0]; + ep->setNext(ptr); + pStart[0] = ep; + return 0; + } + + // now handle the normal case + ep->setNextEQ(NULL); + ep->setNextNE(NULL); + + unsigned char sp = *((const unsigned char*)key); + ptr = pStart[sp]; + + // handle the first insert + if (!ptr) { + pStart[sp] = ep; + return 0; + } + + // otherwise use binary tree insertion so that a sorted + // list can easily be generated later + pptr = NULL; + for (;;) { + pptr = ptr; + if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + ptr = ptr->getNextEQ(); + if (!ptr) { + pptr->setNextEQ(ep); + break; + } + } else { + ptr = ptr->getNextNE(); + if (!ptr) { + pptr->setNextNE(ep); + break; + } + } + } + return 0; +} + +// we want to be able to quickly access suffix information +// both by suffix flag, and sorted by the reverse of the +// suffix string itself; so we need to set up two indexes +int AffixMgr::build_sfxtree(SfxEntry* sfxptr) { + + sfxptr->initReverseWord(); + + SfxEntry* ptr; + SfxEntry* pptr; + SfxEntry* ep = sfxptr; + + /* get the right starting point */ + const char* key = ep->getKey(); + const unsigned char flg = (unsigned char)(ep->getFlag() & 0x00FF); + + // first index by flag which must exist + ptr = sFlag[flg]; + ep->setFlgNxt(ptr); + sFlag[flg] = ep; + + // next index by affix string + + // handle the special case of null affix string + if (strlen(key) == 0) { + // always inset them at head of list at element 0 + ptr = sStart[0]; + ep->setNext(ptr); + sStart[0] = ep; + return 0; + } + + // now handle the normal case + ep->setNextEQ(NULL); + ep->setNextNE(NULL); + + unsigned char sp = *((const unsigned char*)key); + ptr = sStart[sp]; + + // handle the first insert + if (!ptr) { + sStart[sp] = ep; + return 0; + } + + // otherwise use binary tree insertion so that a sorted + // list can easily be generated later + pptr = NULL; + for (;;) { + pptr = ptr; + if (strcmp(ep->getKey(), ptr->getKey()) <= 0) { + ptr = ptr->getNextEQ(); + if (!ptr) { + pptr->setNextEQ(ep); + break; + } + } else { + ptr = ptr->getNextNE(); + if (!ptr) { + pptr->setNextNE(ep); + break; + } + } + } + return 0; +} + +// convert from binary tree to sorted list +int AffixMgr::process_pfx_tree_to_list() { + for (int i = 1; i < SETSIZE; i++) { + pStart[i] = process_pfx_in_order(pStart[i], NULL); + } + return 0; +} + +PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr) { + if (ptr) { + nptr = process_pfx_in_order(ptr->getNextNE(), nptr); + ptr->setNext(nptr); + nptr = process_pfx_in_order(ptr->getNextEQ(), ptr); + } + return nptr; +} + +// convert from binary tree to sorted list +int AffixMgr::process_sfx_tree_to_list() { + for (int i = 1; i < SETSIZE; i++) { + sStart[i] = process_sfx_in_order(sStart[i], NULL); + } + return 0; +} + +SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr) { + if (ptr) { + nptr = process_sfx_in_order(ptr->getNextNE(), nptr); + ptr->setNext(nptr); + nptr = process_sfx_in_order(ptr->getNextEQ(), ptr); + } + return nptr; +} + +// reinitialize the PfxEntry links NextEQ and NextNE to speed searching +// using the idea of leading subsets this time +int AffixMgr::process_pfx_order() { + PfxEntry* ptr; + + // loop through each prefix list starting point + for (int i = 1; i < SETSIZE; i++) { + ptr = pStart[i]; + + // look through the remainder of the list + // and find next entry with affix that + // the current one is not a subset of + // mark that as destination for NextNE + // use next in list that you are a subset + // of as NextEQ + + for (; ptr != NULL; ptr = ptr->getNext()) { + PfxEntry* nptr = ptr->getNext(); + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + } + ptr->setNextNE(nptr); + ptr->setNextEQ(NULL); + if ((ptr->getNext()) && + isSubset(ptr->getKey(), (ptr->getNext())->getKey())) + ptr->setNextEQ(ptr->getNext()); + } + + // now clean up by adding smart search termination strings: + // if you are already a superset of the previous prefix + // but not a subset of the next, search can end here + // so set NextNE properly + + ptr = pStart[i]; + for (; ptr != NULL; ptr = ptr->getNext()) { + PfxEntry* nptr = ptr->getNext(); + PfxEntry* mptr = NULL; + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + mptr = nptr; + } + if (mptr) + mptr->setNextNE(NULL); + } + } + return 0; +} + +// initialize the SfxEntry links NextEQ and NextNE to speed searching +// using the idea of leading subsets this time +int AffixMgr::process_sfx_order() { + SfxEntry* ptr; + + // loop through each prefix list starting point + for (int i = 1; i < SETSIZE; i++) { + ptr = sStart[i]; + + // look through the remainder of the list + // and find next entry with affix that + // the current one is not a subset of + // mark that as destination for NextNE + // use next in list that you are a subset + // of as NextEQ + + for (; ptr != NULL; ptr = ptr->getNext()) { + SfxEntry* nptr = ptr->getNext(); + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + } + ptr->setNextNE(nptr); + ptr->setNextEQ(NULL); + if ((ptr->getNext()) && + isSubset(ptr->getKey(), (ptr->getNext())->getKey())) + ptr->setNextEQ(ptr->getNext()); + } + + // now clean up by adding smart search termination strings: + // if you are already a superset of the previous suffix + // but not a subset of the next, search can end here + // so set NextNE properly + + ptr = sStart[i]; + for (; ptr != NULL; ptr = ptr->getNext()) { + SfxEntry* nptr = ptr->getNext(); + SfxEntry* mptr = NULL; + for (; nptr != NULL; nptr = nptr->getNext()) { + if (!isSubset(ptr->getKey(), nptr->getKey())) + break; + mptr = nptr; + } + if (mptr) + mptr->setNextNE(NULL); + } + } + return 0; +} + +// add flags to the result for dictionary debugging +std::string& AffixMgr::debugflag(std::string& result, unsigned short flag) { + char* st = encode_flag(flag); + result.push_back(MSEP_FLD); + result.append(MORPH_FLAG); + if (st) { + result.append(st); + free(st); + } + return result; +} + +// calculate the character length of the condition +int AffixMgr::condlen(const char* st) { + int l = 0; + bool group = false; + for (; *st; st++) { + if (*st == '[') { + group = true; + l++; + } else if (*st == ']') + group = false; + else if (!group && (!utf8 || (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) + l++; + } + return l; +} + +int AffixMgr::encodeit(AffEntry& entry, const char* cs) { + if (strcmp(cs, ".") != 0) { + entry.numconds = (char)condlen(cs); + const size_t cslen = strlen(cs); + const size_t short_part = std::min<size_t>(MAXCONDLEN, cslen); + memcpy(entry.c.conds, cs, short_part); + if (short_part < MAXCONDLEN) { + //blank out the remaining space + memset(entry.c.conds + short_part, 0, MAXCONDLEN - short_part); + } else if (cs[MAXCONDLEN]) { + //there is more conditions than fit in fixed space, so its + //a long condition + entry.opts |= aeLONGCOND; + entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1); + if (!entry.c.l.conds2) + return 1; + } + } else { + entry.numconds = 0; + entry.c.conds[0] = '\0'; + } + return 0; +} + +// return 1 if s1 is a leading subset of s2 (dots are for infixes) +inline int AffixMgr::isSubset(const char* s1, const char* s2) { + while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) { + s1++; + s2++; + } + return (*s1 == '\0'); +} + +// check word for prefixes +struct hentry* AffixMgr::prefix_check(const char* word, + int len, + char in_compound, + const FLAG needflag) { + struct hentry* rv = NULL; + + pfx = NULL; + pfxappnd = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + if ( + // fogemorpheme + ((in_compound != IN_CPD_NOT) || + !(pe->getCont() && + (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) && + // permit prefixes in compounds + ((in_compound != IN_CPD_END) || + (pe->getCont() && + (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))) { + // check prefix + rv = pe->checkword(word, len, in_compound, needflag); + if (rv) { + pfx = pe; // BUG: pfx not stateless + return rv; + } + } + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + if ( + // fogemorpheme + ((in_compound != IN_CPD_NOT) || + !(pptr->getCont() && + (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) && + // permit prefixes in compounds + ((in_compound != IN_CPD_END) || + (pptr->getCont() && (TESTAFF(pptr->getCont(), compoundpermitflag, + pptr->getContLen()))))) { + // check prefix + rv = pptr->checkword(word, len, in_compound, needflag); + if (rv) { + pfx = pptr; // BUG: pfx not stateless + return rv; + } + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return NULL; +} + +// check word for prefixes and two-level suffixes +struct hentry* AffixMgr::prefix_check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag) { + struct hentry* rv = NULL; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + + while (pe) { + rv = pe->check_twosfx(word, len, in_compound, needflag); + if (rv) + return rv; + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + rv = pptr->check_twosfx(word, len, in_compound, needflag); + if (rv) { + pfx = pptr; + return rv; + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return NULL; +} + +// check word for prefixes and morph +std::string AffixMgr::prefix_check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + + std::string result; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + std::string st = pe->check_morph(word, len, in_compound, needflag); + if (!st.empty()) { + result.append(st); + } + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + std::string st = pptr->check_morph(word, len, in_compound, needflag); + if (!st.empty()) { + // fogemorpheme + if ((in_compound != IN_CPD_NOT) || + !((pptr->getCont() && (TESTAFF(pptr->getCont(), onlyincompound, + pptr->getContLen()))))) { + result.append(st); + pfx = pptr; + } + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return result; +} + +// check word for prefixes and morph and two-level suffixes +std::string AffixMgr::prefix_check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag) { + std::string result; + + pfx = NULL; + sfxappnd = NULL; + sfxextra = 0; + + // first handle the special case of 0 length prefixes + PfxEntry* pe = pStart[0]; + while (pe) { + std::string st = pe->check_twosfx_morph(word, len, in_compound, needflag); + if (!st.empty()) { + result.append(st); + } + pe = pe->getNext(); + } + + // now handle the general case + unsigned char sp = *((const unsigned char*)word); + PfxEntry* pptr = pStart[sp]; + + while (pptr) { + if (isSubset(pptr->getKey(), word)) { + std::string st = pptr->check_twosfx_morph(word, len, in_compound, needflag); + if (!st.empty()) { + result.append(st); + pfx = pptr; + } + pptr = pptr->getNextEQ(); + } else { + pptr = pptr->getNextNE(); + } + } + + return result; +} + +// Is word a non-compound with a REP substitution (see checkcompoundrep)? +int AffixMgr::cpdrep_check(const char* word, int wl) { + + if ((wl < 2) || get_reptable().empty()) + return 0; + + for (size_t i = 0; i < get_reptable().size(); ++i) { + // use only available mid patterns + if (!get_reptable()[i].outstrings[0].empty()) { + const char* r = word; + const size_t lenp = get_reptable()[i].pattern.size(); + // search every occurence of the pattern in the word + while ((r = strstr(r, get_reptable()[i].pattern.c_str())) != NULL) { + std::string candidate(word); + candidate.replace(r - word, lenp, get_reptable()[i].outstrings[0]); + if (candidate_check(candidate.c_str(), candidate.size())) + return 1; + ++r; // search for the next letter + } + } + } + + return 0; +} + +// forbid compound words, if they are in the dictionary as a +// word pair separated by space +int AffixMgr::cpdwordpair_check(const char * word, int wl) { + if (wl > 2) { + std::string candidate(word); + for (size_t i = 1; i < candidate.size(); i++) { + // go to end of the UTF-8 character + if (utf8 && ((word[i] & 0xc0) == 0x80)) + continue; + candidate.insert(i, 1, ' '); + if (candidate_check(candidate.c_str(), candidate.size())) + return 1; + candidate.erase(i, 1); + } + } + + return 0; +} + +// forbid compoundings when there are special patterns at word bound +int AffixMgr::cpdpat_check(const char* word, + int pos, + hentry* r1, + hentry* r2, + const char /*affixed*/) { + for (size_t i = 0; i < checkcpdtable.size(); ++i) { + size_t len; + if (isSubset(checkcpdtable[i].pattern2.c_str(), word + pos) && + (!r1 || !checkcpdtable[i].cond || + (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) && + (!r2 || !checkcpdtable[i].cond2 || + (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) && + // zero length pattern => only TESTAFF + // zero pattern (0/flag) => unmodified stem (zero affixes allowed) + (checkcpdtable[i].pattern.empty() || + ((checkcpdtable[i].pattern[0] == '0' && r1->blen <= pos && + strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) || + (checkcpdtable[i].pattern[0] != '0' && + ((len = checkcpdtable[i].pattern.size()) != 0) && + strncmp(word + pos - len, checkcpdtable[i].pattern.c_str(), len) == 0)))) { + return 1; + } + } + return 0; +} + +// forbid compounding with neighbouring upper and lower case characters at word +// bounds +int AffixMgr::cpdcase_check(const char* word, int pos) { + if (utf8) { + const char* p; + for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--) + ; + std::string pair(p); + std::vector<w_char> pair_u; + u8_u16(pair_u, pair); + unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0; + unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0; + if (((unicodetoupper(a, langnum) == a) || + (unicodetoupper(b, langnum) == b)) && + (a != '-') && (b != '-')) + return 1; + } else { + unsigned char a = *(word + pos - 1); + unsigned char b = *(word + pos); + if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) + return 1; + } + return 0; +} + +struct metachar_data { + signed short btpp; // metacharacter (*, ?) position for backtracking + signed short btwp; // word position for metacharacters + int btnum; // number of matched characters in metacharacter +}; + +// check compound patterns +int AffixMgr::defcpd_check(hentry*** words, + short wnum, + hentry* rv, + hentry** def, + char all) { + int w = 0; + + if (!*words) { + w = 1; + *words = def; + } + + if (!*words) { + return 0; + } + + std::vector<metachar_data> btinfo(1); + + short bt = 0; + + (*words)[wnum] = rv; + + // has the last word COMPOUNDRULE flag? + if (rv->alen == 0) { + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; + } + int ok = 0; + for (size_t i = 0; i < defcpdtable.size(); ++i) { + for (size_t j = 0; j < defcpdtable[i].size(); ++j) { + if (defcpdtable[i][j] != '*' && defcpdtable[i][j] != '?' && + TESTAFF(rv->astr, defcpdtable[i][j], rv->alen)) { + ok = 1; + break; + } + } + } + if (ok == 0) { + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; + } + + for (size_t i = 0; i < defcpdtable.size(); ++i) { + size_t pp = 0; // pattern position + signed short wp = 0; // "words" position + int ok2; + ok = 1; + ok2 = 1; + do { + while ((pp < defcpdtable[i].size()) && (wp <= wnum)) { + if (((pp + 1) < defcpdtable[i].size()) && + ((defcpdtable[i][pp + 1] == '*') || + (defcpdtable[i][pp + 1] == '?'))) { + int wend = (defcpdtable[i][pp + 1] == '?') ? wp : wnum; + ok2 = 1; + pp += 2; + btinfo[bt].btpp = pp; + btinfo[bt].btwp = wp; + while (wp <= wend) { + if (!(*words)[wp]->alen || + !TESTAFF((*words)[wp]->astr, defcpdtable[i][pp - 2], + (*words)[wp]->alen)) { + ok2 = 0; + break; + } + wp++; + } + if (wp <= wnum) + ok2 = 0; + btinfo[bt].btnum = wp - btinfo[bt].btwp; + if (btinfo[bt].btnum > 0) { + ++bt; + btinfo.resize(bt+1); + } + if (ok2) + break; + } else { + ok2 = 1; + if (!(*words)[wp] || !(*words)[wp]->alen || + !TESTAFF((*words)[wp]->astr, defcpdtable[i][pp], + (*words)[wp]->alen)) { + ok = 0; + break; + } + pp++; + wp++; + if ((defcpdtable[i].size() == pp) && !(wp > wnum)) + ok = 0; + } + } + if (ok && ok2) { + size_t r = pp; + while ((defcpdtable[i].size() > r) && ((r + 1) < defcpdtable[i].size()) && + ((defcpdtable[i][r + 1] == '*') || + (defcpdtable[i][r + 1] == '?'))) + r += 2; + if (defcpdtable[i].size() <= r) + return 1; + } + // backtrack + if (bt) + do { + ok = 1; + btinfo[bt - 1].btnum--; + pp = btinfo[bt - 1].btpp; + wp = btinfo[bt - 1].btwp + (signed short)btinfo[bt - 1].btnum; + } while ((btinfo[bt - 1].btnum < 0) && --bt); + } while (bt); + + if (ok && ok2 && (!all || (defcpdtable[i].size() <= pp))) + return 1; + + // check zero ending + while (ok && ok2 && (defcpdtable[i].size() > pp) && + ((pp + 1) < defcpdtable[i].size()) && + ((defcpdtable[i][pp + 1] == '*') || + (defcpdtable[i][pp + 1] == '?'))) + pp += 2; + if (ok && ok2 && (defcpdtable[i].size() <= pp)) + return 1; + } + (*words)[wnum] = NULL; + if (w) + *words = NULL; + return 0; +} + +inline int AffixMgr::candidate_check(const char* word, int len) { + + struct hentry* rv = lookup(word); + if (rv) + return 1; + + // rv = prefix_check(word,len,1); + // if (rv) return 1; + + rv = affix_check(word, len); + if (rv) + return 1; + return 0; +} + +// calculate number of syllable for compound-checking +short AffixMgr::get_syllable(const std::string& word) { + if (cpdmaxsyllable == 0) + return 0; + + short num = 0; + + if (!utf8) { + for (size_t i = 0; i < word.size(); ++i) { + if (std::binary_search(cpdvowels.begin(), cpdvowels.end(), + word[i])) { + ++num; + } + } + } else if (!cpdvowels_utf16.empty()) { + std::vector<w_char> w; + u8_u16(w, word); + for (size_t i = 0; i < w.size(); ++i) { + if (std::binary_search(cpdvowels_utf16.begin(), + cpdvowels_utf16.end(), + w[i])) { + ++num; + } + } + } + + return num; +} + +void AffixMgr::setcminmax(int* cmin, int* cmax, const char* word, int len) { + if (utf8) { + int i; + for (*cmin = 0, i = 0; (i < cpdmin) && *cmin < len; i++) { + for ((*cmin)++; *cmin < len && (word[*cmin] & 0xc0) == 0x80; (*cmin)++) + ; + } + for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax >= 0; i++) { + for ((*cmax)--; *cmax >= 0 && (word[*cmax] & 0xc0) == 0x80; (*cmax)--) + ; + } + } else { + *cmin = cpdmin; + *cmax = len - cpdmin + 1; + } +} + +// check if compound word is correctly spelled +// hu_mov_rule = spec. Hungarian rule (XXX) +struct hentry* AffixMgr::compound_check(const std::string& word, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words = NULL, + hentry** rwords = NULL, + char hu_mov_rule = 0, + char is_sug = 0, + int* info = NULL) { + int i; + short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; + struct hentry* rv = NULL; + struct hentry* rv_first; + std::string st; + char ch = '\0'; + int cmin; + int cmax; + int striple = 0; + size_t scpd = 0; + int soldi = 0; + int oldcmin = 0; + int oldcmax = 0; + int oldlen = 0; + int checkedstriple = 0; + char affixed = 0; + hentry** oldwords = words; + size_t len = word.size(); + + int checked_prefix; + + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + + HUNSPELL_THREAD_LOCAL clock_t timelimit; + + if (wordnum == 0) { + // get the start time, seeing as we're reusing this set to 0 + // to flag timeout, use clock() + 1 to avoid start clock() + // of 0 as being a timeout + timelimit = clock() + 1; + } + else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { + timelimit = 0; + } + + setcminmax(&cmin, &cmax, word.c_str(), len); + + st.assign(word); + + for (i = cmin; i < cmax; i++) { + // go to end of the UTF-8 character + if (utf8) { + for (; (st[i] & 0xc0) == 0x80; i++) + ; + if (i >= cmax) + return NULL; + } + + words = oldwords; + int onlycpdrule = (words) ? 1 : 0; + + do { // onlycpdrule loop + + oldnumsyllable = numsyllable; + oldwordnum = wordnum; + checked_prefix = 0; + + do { // simplified checkcompoundpattern loop + + if (timelimit == 0) + return 0; + + if (scpd > 0) { + for (; scpd <= checkcpdtable.size() && + (checkcpdtable[scpd - 1].pattern3.empty() || + strncmp(word.c_str() + i, checkcpdtable[scpd - 1].pattern3.c_str(), + checkcpdtable[scpd - 1].pattern3.size()) != 0); + scpd++) + ; + + if (scpd > checkcpdtable.size()) + break; // break simplified checkcompoundpattern loop + st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern); + soldi = i; + i += checkcpdtable[scpd - 1].pattern.size(); + st.replace(i, std::string::npos, checkcpdtable[scpd - 1].pattern2); + st.replace(i + checkcpdtable[scpd - 1].pattern2.size(), std::string::npos, + word.substr(soldi + checkcpdtable[scpd - 1].pattern3.size())); + + oldlen = len; + len += checkcpdtable[scpd - 1].pattern.size() + + checkcpdtable[scpd - 1].pattern2.size() - + checkcpdtable[scpd - 1].pattern3.size(); + oldcmin = cmin; + oldcmax = cmax; + setcminmax(&cmin, &cmax, st.c_str(), len); + + cmax = len - cpdmin + 1; + } + + ch = st[i]; + st[i] = '\0'; + + sfx = NULL; + pfx = NULL; + + // FIRST WORD + + affixed = 1; + rv = lookup(st.c_str()); // perhaps without prefix + + // forbid dictionary stems with COMPOUNDFORBIDFLAG in + // compound words, overriding the effect of COMPOUNDPERMITFLAG + if ((rv) && compoundforbidflag && + TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule) + continue; + + // search homonym with compound flag + while ((rv) && !hu_mov_rule && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundbegin && !wordnum && !onlycpdrule && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + (compoundmiddle && wordnum && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) || + (!defcpdtable.empty() && onlycpdrule && + ((!words && !wordnum && + defcpd_check(&words, wnum, rv, rwords, 0)) || + (words && + defcpd_check(&words, wnum, rv, rwords, 0))))) || + (scpd != 0 && checkcpdtable[scpd - 1].cond != FLAG_NULL && + !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)))) { + rv = rv->next_homonym; + } + + if (rv) + affixed = 0; + + if (!rv) { + if (onlycpdrule) + break; + if (compoundflag && + !(rv = prefix_check(st.c_str(), i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundflag))) { + if (((rv = suffix_check( + st.c_str(), i, 0, NULL, FLAG_NULL, compoundflag, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) && + !hu_mov_rule && sfx->getCont() && + ((compoundforbidflag && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || + (compoundend && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + } + + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check( + st.c_str(), i, 0, NULL, FLAG_NULL, compoundbegin, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st.c_str(), i, 0, NULL, + compoundbegin))) || // twofold suffixes + compound + (rv = prefix_check(st.c_str(), i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check( + st.c_str(), i, 0, NULL, FLAG_NULL, compoundmiddle, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st.c_str(), i, 0, NULL, + compoundmiddle))) || // twofold suffixes + compound + (rv = prefix_check(st.c_str(), i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundmiddle)))))) + checked_prefix = 1; + // else check forbiddenwords and needaffix + } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, needaffix, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) { + st[i] = ch; + // continue; + break; + } + + // check non_compound flag in suffix and prefix + if ((rv) && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check compoundend flag in suffix and prefix + if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + + // check compoundmiddle flag in suffix and prefix + if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && + !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) { + return NULL; + } + + // increment word number, if the second root has a compoundroot flag + if ((rv) && compoundroot && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // first word is acceptable in compound words? + if (((rv) && + (checked_prefix || (words && words[wnum]) || + (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + ((oldwordnum == 0) && compoundbegin && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + ((oldwordnum > 0) && compoundmiddle && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) + + // LANG_hu section: spec. Hungarian rule + || ((langnum == LANG_hu) && hu_mov_rule && + (TESTAFF( + rv->astr, 'F', + rv->alen) || // XXX hardwired Hungarian dictionary codes + TESTAFF(rv->astr, 'G', rv->alen) || + TESTAFF(rv->astr, 'H', rv->alen))) + // END of LANG_hu section + ) && + ( + // test CHECKCOMPOUNDPATTERN conditions + scpd == 0 || checkcpdtable[scpd - 1].cond == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond, rv->alen)) && + !((checkcompoundtriple && scpd == 0 && + !words && // test triple letters + (word[i - 1] == word[i]) && + (((i > 1) && (word[i - 1] == word[i - 2])) || + ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' + )) || + (checkcompoundcase && scpd == 0 && !words && + cpdcase_check(word.c_str(), i)))) + // LANG_hu section: spec. Hungarian rule + || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && + (rv = affix_check(st.c_str(), i)) && + (sfx && sfx->getCont() && + ( // XXX hardwired Hungarian dic. codes + TESTAFF(sfx->getCont(), (unsigned short)'x', + sfx->getContLen()) || + TESTAFF( + sfx->getCont(), (unsigned short)'%', + sfx->getContLen()))))) { // first word is ok condition + + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(st.substr(0, i)); + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey()) > 1)) + wordnum++; + } + // END of LANG_hu section + + // NEXT WORD(S) + rv_first = rv; + st[i] = ch; + + do { // striple loop + + // check simplifiedtriple + if (simplifiedtriple) { + if (striple) { + checkedstriple = 1; + i--; // check "fahrt" instead of "ahrt" in "Schiffahrt" + } else if (i > 2 && word[i - 1] == word[i - 2]) + striple = 1; + } + + rv = lookup(st.c_str() + i); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && !words && + TESTAFF(rv->astr, compoundend, rv->alen)) || + (!defcpdtable.empty() && words && + defcpd_check(&words, wnum + 1, rv, NULL, 1))) || + (scpd != 0 && checkcpdtable[scpd - 1].cond2 != FLAG_NULL && + !TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, + rv->alen)))) { + rv = rv->next_homonym; + } + + // check FORCEUCASE + if (rv && forceucase && (rv) && + (TESTAFF(rv->astr, forceucase, rv->alen)) && + !(info && *info & SPELL_ORIGCAP)) + rv = NULL; + + if (rv && words && words[wnum + 1]) + return rv_first; + + oldnumsyllable2 = numsyllable; + oldwordnum2 = wordnum; + + // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary + // code + if ((rv) && (langnum == LANG_hu) && + (TESTAFF(rv->astr, 'I', rv->alen)) && + !(TESTAFF(rv->astr, 'J', rv->alen))) { + numsyllable--; + } + // END of LANG_hu section + + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) + return NULL; + + // second word is acceptable, as a root? + // hungarian conventions: compounding is acceptable, + // when compound forms consist of 2 words, or if more, + // then the syllable number of root words must be 6, or lesser. + + if ((rv) && + ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && + (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <= + cpdmaxsyllable))) && + ( + // test CHECKCOMPOUNDPATTERN + checkcpdtable.empty() || scpd != 0 || + !cpdpat_check(word.c_str(), i, rv_first, rv, 0)) && + ((!checkcompounddup || (rv != rv_first))) + // test CHECKCOMPOUNDPATTERN conditions + && + (scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) { + // forbid compound word, if it is a non-compound word with typical + // fault + if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) || + cpdwordpair_check(word.c_str(), len)) + return NULL; + return rv_first; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word has prefix or/and suffix + sfx = NULL; + sfxflag = FLAG_NULL; + rv = (compoundflag && !onlycpdrule) + ? affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundflag, + IN_CPD_END) + : NULL; + if (!rv && compoundend && !onlycpdrule) { + sfx = NULL; + pfx = NULL; + rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), compoundend, + IN_CPD_END); + } + + if (!rv && !defcpdtable.empty() && words) { + rv = affix_check((word.c_str() + i), strlen(word.c_str() + i), 0, IN_CPD_END); + if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) + return rv_first; + rv = NULL; + } + + // test CHECKCOMPOUNDPATTERN conditions (allowed forms) + if (rv && + !(scpd == 0 || checkcpdtable[scpd - 1].cond2 == FLAG_NULL || + TESTAFF(rv->astr, checkcpdtable[scpd - 1].cond2, rv->alen))) + rv = NULL; + + // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds) + if (rv && !checkcpdtable.empty() && scpd == 0 && + cpdpat_check(word.c_str(), i, rv_first, rv, affixed)) + rv = NULL; + + // check non_compound flag in suffix and prefix + if ((rv) && ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, + pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check FORCEUCASE + if (rv && forceucase && (rv) && + (TESTAFF(rv->astr, forceucase, rv->alen)) && + !(info && *info & SPELL_ORIGCAP)) + rv = NULL; + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + (is_sug && nosuggest && + TESTAFF(rv->astr, nosuggest, rv->alen)))) + return NULL; + + // pfxappnd = prefix of word+i, or NULL + // calculate syllable number of prefix. + // hungarian convention: when syllable number of prefix is more, + // than 1, the prefix+word counts as two words. + + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(word.c_str() + i); + + // - affix syllable num. + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + std::string tmp(sfxappnd); + reverseword(tmp); + numsyllable -= short(get_syllable(tmp) + sfxextra); + } else { + numsyllable -= short(sfxextra); + } + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey()) > 1)) + wordnum++; + + // increment syllable num, if last word has a SYLLABLENUM flag + // and the suffix is beginning `s' + + if (!cpdsyllablenum.empty()) { + switch (sfxflag) { + case 'c': { + numsyllable += 2; + break; + } + case 'J': { + numsyllable += 1; + break; + } + case 'I': { + if (rv && TESTAFF(rv->astr, 'J', rv->alen)) + numsyllable += 1; + break; + } + } + } + } + + // increment word number, if the second word has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + // second word is acceptable, as a word with prefix or/and suffix? + // hungarian conventions: compounding is acceptable, + // when compound forms consist 2 word, otherwise + // the syllable number of root words is 6, or lesser. + if ((rv) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + // forbid compound word, if it is a non-compound word with typical + // fault + if ((checkcompoundrep && cpdrep_check(word.c_str(), len)) || + cpdwordpair_check(word.c_str(), len)) + return NULL; + return rv_first; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word is a compound word (recursive call) + if (wordnum + 2 < maxwordnum) { + rv = compound_check(st.substr(i), wordnum + 1, + numsyllable, maxwordnum, wnum + 1, words, rwords, 0, + is_sug, info); + + if (rv && !checkcpdtable.empty() && + ((scpd == 0 && + cpdpat_check(word.c_str(), i, rv_first, rv, affixed)) || + (scpd != 0 && + !cpdpat_check(word.c_str(), i, rv_first, rv, affixed)))) + rv = NULL; + } else { + rv = NULL; + } + if (rv) { + // forbid compound word, if it is a non-compound word with typical + // fault, or a dictionary word pair + + if (cpdwordpair_check(word.c_str(), len)) + return NULL; + + if (checkcompoundrep || forbiddenword) { + + if (checkcompoundrep && cpdrep_check(word.c_str(), len)) + return NULL; + + // check first part + if (strncmp(rv->word, word.c_str() + i, rv->blen) == 0) { + char r = st[i + rv->blen]; + st[i + rv->blen] = '\0'; + + if ((checkcompoundrep && cpdrep_check(st.c_str(), i + rv->blen)) || + cpdwordpair_check(st.c_str(), i + rv->blen)) { + st[ + i + rv->blen] = r; + continue; + } + + if (forbiddenword) { + struct hentry* rv2 = lookup(word.c_str()); + if (!rv2) + rv2 = affix_check(word.c_str(), len); + if (rv2 && rv2->astr && + TESTAFF(rv2->astr, forbiddenword, rv2->alen) && + (strncmp(rv2->word, st.c_str(), i + rv->blen) == 0)) { + return NULL; + } + } + st[i + rv->blen] = r; + } + } + return rv_first; + } + } while (striple && !checkedstriple); // end of striple loop + + if (checkedstriple) { + i++; + checkedstriple = 0; + striple = 0; + } + + } // first word is ok condition + + if (soldi != 0) { + i = soldi; + soldi = 0; + len = oldlen; + cmin = oldcmin; + cmax = oldcmax; + } + scpd++; + + } while (!onlycpdrule && simplifiedcpd && + scpd <= checkcpdtable.size()); // end of simplifiedcpd loop + + scpd = 0; + wordnum = oldwordnum; + numsyllable = oldnumsyllable; + + if (soldi != 0) { + i = soldi; + st.assign(word); // XXX add more optim. + soldi = 0; + } else + st[i] = ch; + + } while (!defcpdtable.empty() && oldwordnum == 0 && + onlycpdrule++ < 1); // end of onlycpd loop + } + + return NULL; +} + +// check if compound word is correctly spelled +// hu_mov_rule = spec. Hungarian rule (XXX) +int AffixMgr::compound_check_morph(const char* word, + int len, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words, + hentry** rwords, + char hu_mov_rule, + std::string& result, + const std::string* partresult) { + int i; + short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2; + int ok = 0; + + struct hentry* rv = NULL; + struct hentry* rv_first; + std::string st; + char ch; + + int checked_prefix; + std::string presult; + + int cmin; + int cmax; + + char affixed = 0; + hentry** oldwords = words; + + // add a time limit to handle possible + // combinatorical explosion of the overlapping words + + HUNSPELL_THREAD_LOCAL clock_t timelimit; + + if (wordnum == 0) { + // get the start time, seeing as we're reusing this set to 0 + // to flag timeout, use clock() + 1 to avoid start clock() + // of 0 as being a timeout + timelimit = clock() + 1; + } + else if (timelimit != 0 && (clock() > timelimit + TIMELIMIT)) { + timelimit = 0; + } + + setcminmax(&cmin, &cmax, word, len); + + st.assign(word); + + for (i = cmin; i < cmax; i++) { + // go to end of the UTF-8 character + if (utf8) { + for (; (st[i] & 0xc0) == 0x80; i++) + ; + if (i >= cmax) + return 0; + } + + words = oldwords; + int onlycpdrule = (words) ? 1 : 0; + + do { // onlycpdrule loop + + if (timelimit == 0) + return 0; + + oldnumsyllable = numsyllable; + oldwordnum = wordnum; + checked_prefix = 0; + + ch = st[i]; + st[i] = '\0'; + sfx = NULL; + + // FIRST WORD + + affixed = 1; + + presult.clear(); + if (partresult) + presult.append(*partresult); + + rv = lookup(st.c_str()); // perhaps without prefix + + // forbid dictionary stems with COMPOUNDFORBIDFLAG in + // compound words, overriding the effect of COMPOUNDPERMITFLAG + if ((rv) && compoundforbidflag && + TESTAFF(rv->astr, compoundforbidflag, rv->alen) && !hu_mov_rule) + continue; + + // search homonym with compound flag + while ((rv) && !hu_mov_rule && + ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundbegin && !wordnum && !onlycpdrule && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + (compoundmiddle && wordnum && !words && !onlycpdrule && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) || + (!defcpdtable.empty() && onlycpdrule && + ((!words && !wordnum && + defcpd_check(&words, wnum, rv, rwords, 0)) || + (words && + defcpd_check(&words, wnum, rv, rwords, 0))))))) { + rv = rv->next_homonym; + } + + if (timelimit == 0) + return 0; + + if (rv) + affixed = 0; + + if (rv) { + presult.push_back(MSEP_FLD); + presult.append(MORPH_PART); + presult.append(st.c_str()); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + presult.push_back(MSEP_FLD); + presult.append(MORPH_STEM); + presult.append(st.c_str()); + } + if (HENTRY_DATA(rv)) { + presult.push_back(MSEP_FLD); + presult.append(HENTRY_DATA2(rv)); + } + } + + if (!rv) { + if (compoundflag && + !(rv = + prefix_check(st.c_str(), i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundflag))) { + if (((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL, + compoundflag, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx(st.c_str(), i, 0, NULL, compoundflag)))) && + !hu_mov_rule && sfx->getCont() && + ((compoundforbidflag && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())) || + (compoundend && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + rv = NULL; + } + } + + if (rv || + (((wordnum == 0) && compoundbegin && + ((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL, + compoundbegin, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st.c_str(), i, 0, NULL, + compoundbegin))) || // twofold suffix+compound + (rv = prefix_check(st.c_str(), i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundbegin)))) || + ((wordnum > 0) && compoundmiddle && + ((rv = suffix_check(st.c_str(), i, 0, NULL, FLAG_NULL, + compoundmiddle, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || + (compoundmoresuffixes && + (rv = suffix_check_twosfx( + st.c_str(), i, 0, NULL, + compoundmiddle))) || // twofold suffix+compound + (rv = prefix_check(st.c_str(), i, + hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, + compoundmiddle)))))) { + std::string p; + if (compoundflag) + p = affix_check_morph(st.c_str(), i, compoundflag); + if (p.empty()) { + if ((wordnum == 0) && compoundbegin) { + p = affix_check_morph(st.c_str(), i, compoundbegin); + } else if ((wordnum > 0) && compoundmiddle) { + p = affix_check_morph(st.c_str(), i, compoundmiddle); + } + } + if (!p.empty()) { + presult.push_back(MSEP_FLD); + presult.append(MORPH_PART); + presult.append(st.c_str()); + line_uniq_app(p, MSEP_REC); + presult.append(p); + } + checked_prefix = 1; + } + // else check forbiddenwords + } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, needaffix, rv->alen))) { + st[i] = ch; + continue; + } + + // check non_compound flag in suffix and prefix + if ((rv) && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, sfx->getContLen())))) { + continue; + } + + // check compoundend flag in suffix and prefix + if ((rv) && !checked_prefix && compoundend && !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundend, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundend, sfx->getContLen())))) { + continue; + } + + // check compoundmiddle flag in suffix and prefix + if ((rv) && !checked_prefix && (wordnum == 0) && compoundmiddle && + !hu_mov_rule && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundmiddle, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundmiddle, sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) + continue; + + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // first word is acceptable in compound words? + if (((rv) && + (checked_prefix || (words && words[wnum]) || + (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + ((oldwordnum == 0) && compoundbegin && + TESTAFF(rv->astr, compoundbegin, rv->alen)) || + ((oldwordnum > 0) && compoundmiddle && + TESTAFF(rv->astr, compoundmiddle, rv->alen)) + // LANG_hu section: spec. Hungarian rule + || ((langnum == LANG_hu) && // hu_mov_rule + hu_mov_rule && (TESTAFF(rv->astr, 'F', rv->alen) || + TESTAFF(rv->astr, 'G', rv->alen) || + TESTAFF(rv->astr, 'H', rv->alen))) + // END of LANG_hu section + ) && + !((checkcompoundtriple && !words && // test triple letters + (word[i - 1] == word[i]) && + (((i > 1) && (word[i - 1] == word[i - 2])) || + ((word[i - 1] == word[i + 1])) // may be word[i+1] == '\0' + )) || + ( + // test CHECKCOMPOUNDPATTERN + !checkcpdtable.empty() && !words && + cpdpat_check(word, i, rv, NULL, affixed)) || + (checkcompoundcase && !words && cpdcase_check(word, i)))) + // LANG_hu section: spec. Hungarian rule + || + ((!rv) && (langnum == LANG_hu) && hu_mov_rule && + (rv = affix_check(st.c_str(), i)) && + (sfx && sfx->getCont() && + (TESTAFF(sfx->getCont(), (unsigned short)'x', sfx->getContLen()) || + TESTAFF(sfx->getCont(), (unsigned short)'%', sfx->getContLen())))) + // END of LANG_hu section + ) { + // LANG_hu section: spec. Hungarian rule + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(st.substr(0, i)); + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey()) > 1)) + wordnum++; + } + // END of LANG_hu section + + // NEXT WORD(S) + rv_first = rv; + rv = lookup((word + i)); // perhaps without prefix + + // search homonym with compound flag + while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) || + !((compoundflag && !words && + TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && !words && + TESTAFF(rv->astr, compoundend, rv->alen)) || + (!defcpdtable.empty() && words && + defcpd_check(&words, wnum + 1, rv, NULL, 1))))) { + rv = rv->next_homonym; + } + + if (rv && words && words[wnum + 1]) { + result.append(presult); + result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + // store the pointer of the hash entry + if (!complexprefixes && HENTRY_DATA(rv)) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + result.push_back(MSEP_REC); + return 0; + } + + oldnumsyllable2 = numsyllable; + oldwordnum2 = wordnum; + + // LANG_hu section: spec. Hungarian rule + if ((rv) && (langnum == LANG_hu) && + (TESTAFF(rv->astr, 'I', rv->alen)) && + !(TESTAFF(rv->astr, 'J', rv->alen))) { + numsyllable--; + } + // END of LANG_hu section + // increment word number, if the second root has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) { + st[i] = ch; + continue; + } + + // second word is acceptable, as a root? + // hungarian conventions: compounding is acceptable, + // when compound forms consist of 2 words, or if more, + // then the syllable number of root words must be 6, or lesser. + if ((rv) && + ((compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) || + (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && + (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <= + cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + // bad compound word + result.append(presult); + result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + + if (HENTRY_DATA(rv)) { + if (complexprefixes) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + // store the pointer of the hash entry + if (!complexprefixes) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + } + result.push_back(MSEP_REC); + ok = 1; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word has prefix or/and suffix + sfx = NULL; + sfxflag = FLAG_NULL; + + if (compoundflag && !onlycpdrule) + rv = affix_check((word + i), strlen(word + i), compoundflag); + else + rv = NULL; + + if (!rv && compoundend && !onlycpdrule) { + sfx = NULL; + pfx = NULL; + rv = affix_check((word + i), strlen(word + i), compoundend); + } + + if (!rv && !defcpdtable.empty() && words) { + rv = affix_check((word + i), strlen(word + i), 0, IN_CPD_END); + if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) { + std::string m; + if (compoundflag) + m = affix_check_morph((word + i), strlen(word + i), compoundflag); + if (m.empty() && compoundend) { + m = affix_check_morph((word + i), strlen(word + i), compoundend); + } + result.append(presult); + if (!m.empty()) { + result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + line_uniq_app(m, MSEP_REC); + result.append(m); + } + result.push_back(MSEP_REC); + ok = 1; + } + } + + // check non_compound flag in suffix and prefix + if ((rv) && + ((pfx && pfx->getCont() && + TESTAFF(pfx->getCont(), compoundforbidflag, pfx->getContLen())) || + (sfx && sfx->getCont() && + TESTAFF(sfx->getCont(), compoundforbidflag, + sfx->getContLen())))) { + rv = NULL; + } + + // check forbiddenwords + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, forbiddenword, rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen)) && + (!TESTAFF(rv->astr, needaffix, rv->alen))) { + st[i] = ch; + continue; + } + + if (langnum == LANG_hu) { + // calculate syllable number of the word + numsyllable += get_syllable(word + i); + + // - affix syllable num. + // XXX only second suffix (inflections, not derivations) + if (sfxappnd) { + std::string tmp(sfxappnd); + reverseword(tmp); + numsyllable -= short(get_syllable(tmp) + sfxextra); + } else { + numsyllable -= short(sfxextra); + } + + // + 1 word, if syllable number of the prefix > 1 (hungarian + // convention) + if (pfx && (get_syllable(pfx->getKey()) > 1)) + wordnum++; + + // increment syllable num, if last word has a SYLLABLENUM flag + // and the suffix is beginning `s' + + if (!cpdsyllablenum.empty()) { + switch (sfxflag) { + case 'c': { + numsyllable += 2; + break; + } + case 'J': { + numsyllable += 1; + break; + } + case 'I': { + if (rv && TESTAFF(rv->astr, 'J', rv->alen)) + numsyllable += 1; + break; + } + } + } + } + + // increment word number, if the second word has a compoundroot flag + if ((rv) && (compoundroot) && + (TESTAFF(rv->astr, compoundroot, rv->alen))) { + wordnum++; + } + // second word is acceptable, as a word with prefix or/and suffix? + // hungarian conventions: compounding is acceptable, + // when compound forms consist 2 word, otherwise + // the syllable number of root words is 6, or lesser. + if ((rv) && + (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || + ((cpdmaxsyllable != 0) && (numsyllable <= cpdmaxsyllable))) && + ((!checkcompounddup || (rv != rv_first)))) { + std::string m; + if (compoundflag) + m = affix_check_morph((word + i), strlen(word + i), compoundflag); + if (m.empty() && compoundend) { + m = affix_check_morph((word + i), strlen(word + i), compoundend); + } + result.append(presult); + if (!m.empty()) { + result.push_back(MSEP_FLD); + result.append(MORPH_PART); + result.append(word + i); + line_uniq_app(m, MSEP_REC); + result.push_back(MSEP_FLD); + result.append(m); + } + result.push_back(MSEP_REC); + ok = 1; + } + + numsyllable = oldnumsyllable2; + wordnum = oldwordnum2; + + // perhaps second word is a compound word (recursive call) + if ((wordnum + 2 < maxwordnum) && (ok == 0)) { + compound_check_morph((word + i), strlen(word + i), wordnum + 1, + numsyllable, maxwordnum, wnum + 1, words, rwords, 0, + result, &presult); + } else { + rv = NULL; + } + } + st[i] = ch; + wordnum = oldwordnum; + numsyllable = oldnumsyllable; + + } while (!defcpdtable.empty() && oldwordnum == 0 && + onlycpdrule++ < 1); // end of onlycpd loop + } + return 0; +} + + +inline int AffixMgr::isRevSubset(const char* s1, + const char* end_of_s2, + int len) { + while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) { + s1++; + end_of_s2--; + len--; + } + return (*s1 == '\0'); +} + +// check word for suffixes +struct hentry* AffixMgr::suffix_check(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag, + char in_compound) { + struct hentry* rv = NULL; + PfxEntry* ep = ppfx; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + + while (se) { + if (!cclass || se->getCont()) { + // suffixes are not allowed in beginning of compounds + if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (se->getCont() && compoundpermitflag && + TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!se->getCont() || + !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (se->getCont() && + (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && + // fogemorpheme + (in_compound || + !(se->getCont() && + (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) && + // needaffix on prefix or first suffix + (cclass || + !(se->getCont() && + TESTAFF(se->getCont(), needaffix, se->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) { + rv = se->checkword(word, len, sfxopts, ppfx, + (FLAG)cclass, needflag, + (in_compound ? 0 : onlyincompound)); + if (rv) { + sfx = se; // BUG: sfx not stateless + return rv; + } + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + // suffixes are not allowed in beginning of compounds + if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (sptr->getCont() && compoundpermitflag && + TESTAFF(sptr->getCont(), compoundpermitflag, + sptr->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!sptr->getCont() || + !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (sptr->getCont() && + (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && + // fogemorpheme + (in_compound || + !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, + sptr->getContLen()))))) && + // needaffix on prefix or first suffix + (cclass || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen()))))) + if (in_compound != IN_CPD_END || ppfx || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) { + rv = sptr->checkword(word, len, sfxopts, ppfx, + cclass, needflag, + (in_compound ? 0 : onlyincompound)); + if (rv) { + sfx = sptr; // BUG: sfx not stateless + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + // LANG_hu section: spec. Hungarian rule + else if (langnum == LANG_hu && sptr->getKeyLen() && + sptr->getKey()[0] == 'i' && sptr->getKey()[1] != 'y' && + sptr->getKey()[1] != 't') { + sfxextra = 1; + } + // END of LANG_hu section + return rv; + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return NULL; +} + +// check word for two-level suffixes +struct hentry* AffixMgr::suffix_check_twosfx(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag) { + struct hentry* rv = NULL; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (contclasses[se->getFlag()]) { + rv = se->check_twosfx(word, len, sfxopts, ppfx, needflag); + if (rv) + return rv; + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return NULL; // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + if (contclasses[sptr->getFlag()]) { + rv = sptr->check_twosfx(word, len, sfxopts, ppfx, needflag); + if (rv) { + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + return rv; + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return NULL; +} + +// check word for two-level suffixes and morph +std::string AffixMgr::suffix_check_twosfx_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag) { + std::string result; + std::string result2; + std::string result3; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (contclasses[se->getFlag()]) { + std::string st = se->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); + if (!st.empty()) { + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); + result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + result.append(st); + if (se->getMorph()) { + result.push_back(MSEP_FLD); + result.append(se->getMorph()); + } else + debugflag(result, se->getFlag()); + result.push_back(MSEP_REC); + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return std::string(); // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + if (contclasses[sptr->getFlag()]) { + std::string st = sptr->check_twosfx_morph(word, len, sfxopts, ppfx, needflag); + if (!st.empty()) { + sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless + if (!sptr->getCont()) + sfxappnd = sptr->getKey(); // BUG: sfxappnd not stateless + result2.assign(st); + + result3.clear(); + + if (sptr->getMorph()) { + result3.push_back(MSEP_FLD); + result3.append(sptr->getMorph()); + } else + debugflag(result3, sptr->getFlag()); + strlinecat(result2, result3); + result2.push_back(MSEP_REC); + result.append(result2); + } + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return result; +} + +std::string AffixMgr::suffix_check_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG cclass, + const FLAG needflag, + char in_compound) { + std::string result; + + struct hentry* rv = NULL; + + PfxEntry* ep = ppfx; + + // first handle the special case of 0 length suffixes + SfxEntry* se = sStart[0]; + while (se) { + if (!cclass || se->getCont()) { + // suffixes are not allowed in beginning of compounds + if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (se->getCont() && compoundpermitflag && + TESTAFF(se->getCont(), compoundpermitflag, se->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!se->getCont() || + !(TESTAFF(se->getCont(), circumfix, se->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (se->getCont() && + (TESTAFF(se->getCont(), circumfix, se->getContLen()))))) && + // fogemorpheme + (in_compound || + !((se->getCont() && + (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) && + // needaffix on prefix or first suffix + (cclass || + !(se->getCont() && + TESTAFF(se->getCont(), needaffix, se->getContLen())) || + (ppfx && + !((ep->getCont()) && + TESTAFF(ep->getCont(), needaffix, ep->getContLen())))))) + rv = se->checkword(word, len, sfxopts, ppfx, cclass, + needflag, FLAG_NULL); + while (rv) { + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); + result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + + if (!complexprefixes && HENTRY_DATA(rv)) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + if (se->getMorph()) { + result.push_back(MSEP_FLD); + result.append(se->getMorph()); + } else + debugflag(result, se->getFlag()); + result.push_back(MSEP_REC); + rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + } + se = se->getNext(); + } + + // now handle the general case + if (len == 0) + return std::string(); // FULLSTRIP + unsigned char sp = *((const unsigned char*)(word + len - 1)); + SfxEntry* sptr = sStart[sp]; + + while (sptr) { + if (isRevSubset(sptr->getKey(), word + len - 1, len)) { + // suffixes are not allowed in beginning of compounds + if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass + // except when signed with compoundpermitflag flag + (sptr->getCont() && compoundpermitflag && + TESTAFF(sptr->getCont(), compoundpermitflag, + sptr->getContLen()))) && + (!circumfix || + // no circumfix flag in prefix and suffix + ((!ppfx || !(ep->getCont()) || + !TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (!sptr->getCont() || + !(TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())))) || + // circumfix flag in prefix AND suffix + ((ppfx && (ep->getCont()) && + TESTAFF(ep->getCont(), circumfix, ep->getContLen())) && + (sptr->getCont() && + (TESTAFF(sptr->getCont(), circumfix, sptr->getContLen()))))) && + // fogemorpheme + (in_compound || + !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, + sptr->getContLen()))))) && + // needaffix on first suffix + (cclass || + !(sptr->getCont() && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen()))))) + rv = sptr->checkword(word, len, sfxopts, ppfx, cclass, + needflag, FLAG_NULL); + while (rv) { + if (ppfx) { + if (ppfx->getMorph()) { + result.append(ppfx->getMorph()); + result.push_back(MSEP_FLD); + } else + debugflag(result, ppfx->getFlag()); + } + if (complexprefixes && HENTRY_DATA(rv)) + result.append(HENTRY_DATA2(rv)); + if (!HENTRY_FIND(rv, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(HENTRY_WORD(rv)); + } + + if (!complexprefixes && HENTRY_DATA(rv)) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + + if (sptr->getMorph()) { + result.push_back(MSEP_FLD); + result.append(sptr->getMorph()); + } else + debugflag(result, sptr->getFlag()); + result.push_back(MSEP_REC); + rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag); + } + sptr = sptr->getNextEQ(); + } else { + sptr = sptr->getNextNE(); + } + } + + return result; +} + +// check if word with affixes is correctly spelled +struct hentry* AffixMgr::affix_check(const char* word, + int len, + const FLAG needflag, + char in_compound) { + + // check all prefixes (also crossed with suffixes if allowed) + struct hentry* rv = prefix_check(word, len, in_compound, needflag); + if (rv) + return rv; + + // if still not found check all suffixes + rv = suffix_check(word, len, 0, NULL, FLAG_NULL, needflag, in_compound); + + if (havecontclass) { + sfx = NULL; + pfx = NULL; + + if (rv) + return rv; + // if still not found check all two-level suffixes + rv = suffix_check_twosfx(word, len, 0, NULL, needflag); + + if (rv) + return rv; + // if still not found check all two-level suffixes + rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag); + } + + return rv; +} + +// check if word with affixes is correctly spelled +std::string AffixMgr::affix_check_morph(const char* word, + int len, + const FLAG needflag, + char in_compound) { + std::string result; + + // check all prefixes (also crossed with suffixes if allowed) + std::string st = prefix_check_morph(word, len, in_compound); + if (!st.empty()) { + result.append(st); + } + + // if still not found check all suffixes + st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound); + if (!st.empty()) { + result.append(st); + } + + if (havecontclass) { + sfx = NULL; + pfx = NULL; + // if still not found check all two-level suffixes + st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag); + if (!st.empty()) { + result.append(st); + } + + // if still not found check all two-level suffixes + st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag); + if (!st.empty()) { + result.append(st); + } + } + + return result; +} + +// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields +// in the first line of the inputs +// return 0, if inputs equal +// return 1, if inputs may equal with a secondary suffix +// otherwise return -1 +static int morphcmp(const char* s, const char* t) { + int se = 0; + int te = 0; + const char* sl; + const char* tl; + const char* olds; + const char* oldt; + if (!s || !t) + return 1; + olds = s; + sl = strchr(s, '\n'); + s = strstr(s, MORPH_DERI_SFX); + if (!s || (sl && sl < s)) + s = strstr(olds, MORPH_INFL_SFX); + if (!s || (sl && sl < s)) { + s = strstr(olds, MORPH_TERM_SFX); + olds = NULL; + } + oldt = t; + tl = strchr(t, '\n'); + t = strstr(t, MORPH_DERI_SFX); + if (!t || (tl && tl < t)) + t = strstr(oldt, MORPH_INFL_SFX); + if (!t || (tl && tl < t)) { + t = strstr(oldt, MORPH_TERM_SFX); + oldt = NULL; + } + while (s && t && (!sl || sl > s) && (!tl || tl > t)) { + s += MORPH_TAG_LEN; + t += MORPH_TAG_LEN; + se = 0; + te = 0; + while ((*s == *t) && !se && !te) { + s++; + t++; + switch (*s) { + case ' ': + case '\n': + case '\t': + case '\0': + se = 1; + } + switch (*t) { + case ' ': + case '\n': + case '\t': + case '\0': + te = 1; + } + } + if (!se || !te) { + // not terminal suffix difference + if (olds) + return -1; + return 1; + } + olds = s; + s = strstr(s, MORPH_DERI_SFX); + if (!s || (sl && sl < s)) + s = strstr(olds, MORPH_INFL_SFX); + if (!s || (sl && sl < s)) { + s = strstr(olds, MORPH_TERM_SFX); + olds = NULL; + } + oldt = t; + t = strstr(t, MORPH_DERI_SFX); + if (!t || (tl && tl < t)) + t = strstr(oldt, MORPH_INFL_SFX); + if (!t || (tl && tl < t)) { + t = strstr(oldt, MORPH_TERM_SFX); + oldt = NULL; + } + } + if (!s && !t && se && te) + return 0; + return 1; +} + +std::string AffixMgr::morphgen(const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* morph, + const char* targetmorph, + int level) { + // handle suffixes + if (!morph) + return std::string(); + + // check substandard flag + if (TESTAFF(ap, substandard, al)) + return std::string(); + + if (morphcmp(morph, targetmorph) == 0) + return ts; + + size_t stemmorphcatpos; + std::string mymorph; + + // use input suffix fields, if exist + if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) { + mymorph.assign(morph); + mymorph.push_back(MSEP_FLD); + stemmorphcatpos = mymorph.size(); + } else { + stemmorphcatpos = std::string::npos; + } + + for (int i = 0; i < al; i++) { + const unsigned char c = (unsigned char)(ap[i] & 0x00FF); + SfxEntry* sptr = sFlag[c]; + while (sptr) { + if (sptr->getFlag() == ap[i] && sptr->getMorph() && + ((sptr->getContLen() == 0) || + // don't generate forms with substandard affixes + !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) { + const char* stemmorph; + if (stemmorphcatpos != std::string::npos) { + mymorph.replace(stemmorphcatpos, std::string::npos, sptr->getMorph()); + stemmorph = mymorph.c_str(); + } else { + stemmorph = sptr->getMorph(); + } + + int cmp = morphcmp(stemmorph, targetmorph); + + if (cmp == 0) { + std::string newword = sptr->add(ts, wl); + if (!newword.empty()) { + hentry* check = pHMgr->lookup(newword.c_str()); // XXX extra dic + if (!check || !check->astr || + !(TESTAFF(check->astr, forbiddenword, check->alen) || + TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) { + return newword; + } + } + } + + // recursive call for secondary suffixes + if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) && + !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) { + std::string newword = sptr->add(ts, wl); + if (!newword.empty()) { + std::string newword2 = + morphgen(newword.c_str(), newword.size(), sptr->getCont(), + sptr->getContLen(), stemmorph, targetmorph, 1); + + if (!newword2.empty()) { + return newword2; + } + } + } + } + sptr = sptr->getFlgNxt(); + } + } + return std::string(); +} + +int AffixMgr::expand_rootword(struct guessword* wlst, + int maxn, + const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* bad, + int badl, + const char* phon) { + int nh = 0; + // first add root word to list + if ((nh < maxn) && + !(al && ((needaffix && TESTAFF(ap, needaffix, al)) || + (onlyincompound && TESTAFF(ap, onlyincompound, al))))) { + wlst[nh].word = mystrdup(ts); + if (!wlst[nh].word) + return 0; + wlst[nh].allow = false; + wlst[nh].orig = NULL; + nh++; + // add special phonetic version + if (phon && (nh < maxn)) { + wlst[nh].word = mystrdup(phon); + if (!wlst[nh].word) + return nh - 1; + wlst[nh].allow = false; + wlst[nh].orig = mystrdup(ts); + if (!wlst[nh].orig) + return nh - 1; + nh++; + } + } + + // handle suffixes + for (int i = 0; i < al; i++) { + const unsigned char c = (unsigned char)(ap[i] & 0x00FF); + SfxEntry* sptr = sFlag[c]; + while (sptr) { + if ((sptr->getFlag() == ap[i]) && + (!sptr->getKeyLen() || + ((badl > sptr->getKeyLen()) && + (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) && + // check needaffix flag + !(sptr->getCont() && + ((needaffix && + TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) || + (circumfix && + TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) || + (onlyincompound && + TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) { + std::string newword = sptr->add(ts, wl); + if (!newword.empty()) { + if (nh < maxn) { + wlst[nh].word = mystrdup(newword.c_str()); + wlst[nh].allow = sptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + // add special phonetic version + if (phon && (nh < maxn)) { + std::string prefix(phon); + std::string key(sptr->getKey()); + reverseword(key); + prefix.append(key); + wlst[nh].word = mystrdup(prefix.c_str()); + if (!wlst[nh].word) + return nh - 1; + wlst[nh].allow = false; + wlst[nh].orig = mystrdup(newword.c_str()); + if (!wlst[nh].orig) + return nh - 1; + nh++; + } + } + } + } + sptr = sptr->getFlgNxt(); + } + } + + int n = nh; + + // handle cross products of prefixes and suffixes + for (int j = 1; j < n; j++) + if (wlst[j].allow) { + for (int k = 0; k < al; k++) { + const unsigned char c = (unsigned char)(ap[k] & 0x00FF); + PfxEntry* cptr = pFlag[c]; + while (cptr) { + if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && + (!cptr->getKeyLen() || + ((badl > cptr->getKeyLen()) && + (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) { + int l1 = strlen(wlst[j].word); + std::string newword = cptr->add(wlst[j].word, l1); + if (!newword.empty()) { + if (nh < maxn) { + wlst[nh].word = mystrdup(newword.c_str()); + wlst[nh].allow = cptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + } + } + } + cptr = cptr->getFlgNxt(); + } + } + } + + // now handle pure prefixes + for (int m = 0; m < al; m++) { + const unsigned char c = (unsigned char)(ap[m] & 0x00FF); + PfxEntry* ptr = pFlag[c]; + while (ptr) { + if ((ptr->getFlag() == ap[m]) && + (!ptr->getKeyLen() || + ((badl > ptr->getKeyLen()) && + (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) && + // check needaffix flag + !(ptr->getCont() && + ((needaffix && + TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) || + (circumfix && + TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) || + (onlyincompound && + TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))) { + std::string newword = ptr->add(ts, wl); + if (!newword.empty()) { + if (nh < maxn) { + wlst[nh].word = mystrdup(newword.c_str()); + wlst[nh].allow = ptr->allowCross(); + wlst[nh].orig = NULL; + nh++; + } + } + } + ptr = ptr->getFlgNxt(); + } + } + + return nh; +} + +// return replacing table +const std::vector<replentry>& AffixMgr::get_reptable() const { + return pHMgr->get_reptable(); +} + +// return iconv table +RepList* AffixMgr::get_iconvtable() const { + if (!iconvtable) + return NULL; + return iconvtable; +} + +// return oconv table +RepList* AffixMgr::get_oconvtable() const { + if (!oconvtable) + return NULL; + return oconvtable; +} + +// return replacing table +struct phonetable* AffixMgr::get_phonetable() const { + if (!phone) + return NULL; + return phone; +} + +// return character map table +const std::vector<mapentry>& AffixMgr::get_maptable() const { + return maptable; +} + +// return character map table +const std::vector<std::string>& AffixMgr::get_breaktable() const { + return breaktable; +} + +// return text encoding of dictionary +const std::string& AffixMgr::get_encoding() { + if (encoding.empty()) + encoding = SPELL_ENCODING; + return encoding; +} + +// return text encoding of dictionary +int AffixMgr::get_langnum() const { + return langnum; +} + +// return double prefix option +int AffixMgr::get_complexprefixes() const { + return complexprefixes; +} + +// return FULLSTRIP option +int AffixMgr::get_fullstrip() const { + return fullstrip; +} + +FLAG AffixMgr::get_keepcase() const { + return keepcase; +} + +FLAG AffixMgr::get_forceucase() const { + return forceucase; +} + +FLAG AffixMgr::get_warn() const { + return warn; +} + +int AffixMgr::get_forbidwarn() const { + return forbidwarn; +} + +int AffixMgr::get_checksharps() const { + return checksharps; +} + +char* AffixMgr::encode_flag(unsigned short aflag) const { + return pHMgr->encode_flag(aflag); +} + +// return the preferred ignore string for suggestions +const char* AffixMgr::get_ignore() const { + if (ignorechars.empty()) + return NULL; + return ignorechars.c_str(); +} + +// return the preferred ignore string for suggestions +const std::vector<w_char>& AffixMgr::get_ignore_utf16() const { + return ignorechars_utf16; +} + +// return the keyboard string for suggestions +char* AffixMgr::get_key_string() { + if (keystring.empty()) + keystring = SPELL_KEYSTRING; + return mystrdup(keystring.c_str()); +} + +// return the preferred try string for suggestions +char* AffixMgr::get_try_string() const { + if (trystring.empty()) + return NULL; + return mystrdup(trystring.c_str()); +} + +// return the preferred try string for suggestions +const std::string& AffixMgr::get_wordchars() const { + return wordchars; +} + +const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const { + return wordchars_utf16; +} + +// is there compounding? +int AffixMgr::get_compound() const { + return compoundflag || compoundbegin || !defcpdtable.empty(); +} + +// return the compound words control flag +FLAG AffixMgr::get_compoundflag() const { + return compoundflag; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_forbiddenword() const { + return forbiddenword; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_nosuggest() const { + return nosuggest; +} + +// return the forbidden words control flag +FLAG AffixMgr::get_nongramsuggest() const { + return nongramsuggest; +} + +// return the substandard root/affix control flag +FLAG AffixMgr::get_substandard() const { + return substandard; +} + +// return the forbidden words flag modify flag +FLAG AffixMgr::get_needaffix() const { + return needaffix; +} + +// return the onlyincompound flag +FLAG AffixMgr::get_onlyincompound() const { + return onlyincompound; +} + +// return the value of suffix +const std::string& AffixMgr::get_version() const { + return version; +} + +// utility method to look up root words in hash table +struct hentry* AffixMgr::lookup(const char* word) { + struct hentry* he = NULL; + for (size_t i = 0; i < alldic.size() && !he; ++i) { + he = alldic[i]->lookup(word); + } + return he; +} + +// return the value of suffix +int AffixMgr::have_contclass() const { + return havecontclass; +} + +// return utf8 +int AffixMgr::get_utf8() const { + return utf8; +} + +int AffixMgr::get_maxngramsugs(void) const { + return maxngramsugs; +} + +int AffixMgr::get_maxcpdsugs(void) const { + return maxcpdsugs; +} + +int AffixMgr::get_maxdiff(void) const { + return maxdiff; +} + +int AffixMgr::get_onlymaxdiff(void) const { + return onlymaxdiff; +} + +// return nosplitsugs +int AffixMgr::get_nosplitsugs(void) const { + return nosplitsugs; +} + +// return sugswithdots +int AffixMgr::get_sugswithdots(void) const { + return sugswithdots; +} + +/* parse flag */ +bool AffixMgr::parse_flag(const std::string& line, unsigned short* out, FileMgr* af) { + if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix file parameter\n", + af->getlinenum()); + return false; + } + std::string s; + if (!parse_string(line, s, af->getlinenum())) + return false; + *out = pHMgr->decode_flag(s.c_str()); + return true; +} + +/* parse num */ +bool AffixMgr::parse_num(const std::string& line, int* out, FileMgr* af) { + if (*out != -1) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix file parameter\n", + af->getlinenum()); + return false; + } + std::string s; + if (!parse_string(line, s, af->getlinenum())) + return false; + *out = atoi(s.c_str()); + return true; +} + +/* parse in the max syllablecount of compound words and */ +bool AffixMgr::parse_cpdsyllable(const std::string& line, FileMgr* af) { + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + cpdmaxsyllable = atoi(std::string(start_piece, iter).c_str()); + np++; + break; + } + case 2: { + if (!utf8) { + cpdvowels.assign(start_piece, iter); + std::sort(cpdvowels.begin(), cpdvowels.end()); + } else { + std::string piece(start_piece, iter); + u8_u16(cpdvowels_utf16, piece); + std::sort(cpdvowels_utf16.begin(), cpdvowels_utf16.end()); + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np < 2) { + HUNSPELL_WARNING(stderr, + "error: line %d: missing compoundsyllable information\n", + af->getlinenum()); + return false; + } + if (np == 2) + cpdvowels = "AEIOUaeiou"; + return true; +} + +bool AffixMgr::parse_convtable(const std::string& line, + FileMgr* af, + RepList** rl, + const std::string& keyword) { + if (*rl) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int i = 0; + int np = 0; + int numrl = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numrl = atoi(std::string(start_piece, iter).c_str()); + if (numrl < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", + af->getlinenum()); + return false; + } + *rl = new RepList(numrl); + if (!*rl) + return false; + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the num lines to read in the remainder of the table */ + for (int j = 0; j < numrl; j++) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + std::string pattern; + std::string pattern2; + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), keyword.size(), keyword, 0, keyword.size()) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + delete *rl; + *rl = NULL; + return false; + } + break; + } + case 1: { + pattern.assign(start_piece, iter); + break; + } + case 2: { + pattern2.assign(start_piece, iter); + break; + } + default: + break; + } + ++i; + } + start_piece = mystrsep(nl, iter); + } + if (pattern.empty() || pattern2.empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + (*rl)->add(pattern, pattern2); + } + return true; +} + +/* parse in the typical fault correcting table */ +bool AffixMgr::parse_phonetable(const std::string& line, FileMgr* af) { + if (phone) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int num = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + num = atoi(std::string(start_piece, iter).c_str()); + if (num < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + phone = new phonetable; + phone->utf8 = (char)utf8; + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the phone->num lines to read in the remainder of the table */ + for (int j = 0; j < num; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + const size_t old_size = phone->rules.size(); + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 5, "PHONE", 5) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + break; + } + case 1: { + phone->rules.push_back(std::string(start_piece, iter)); + break; + } + case 2: { + phone->rules.push_back(std::string(start_piece, iter)); + mystrrep(phone->rules.back(), "_", ""); + break; + } + default: + break; + } + ++i; + } + start_piece = mystrsep(nl, iter); + } + if (phone->rules.size() != old_size + 2) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + phone->rules.clear(); + return false; + } + } + phone->rules.push_back(""); + phone->rules.push_back(""); + init_phonet_hash(*phone); + return true; +} + +/* parse in the checkcompoundpattern table */ +bool AffixMgr::parse_checkcpdtable(const std::string& line, FileMgr* af) { + if (parsedcheckcpd) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + parsedcheckcpd = true; + int numcheckcpd = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numcheckcpd = atoi(std::string(start_piece, iter).c_str()); + if (numcheckcpd < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + checkcpdtable.reserve(numcheckcpd); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numcheckcpd lines to read in the remainder of the table */ + for (int j = 0; j < numcheckcpd; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + checkcpdtable.push_back(patentry()); + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 20, "CHECKCOMPOUNDPATTERN", 20) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + break; + } + case 1: { + checkcpdtable.back().pattern.assign(start_piece, iter); + size_t slash_pos = checkcpdtable.back().pattern.find('/'); + if (slash_pos != std::string::npos) { + std::string chunk(checkcpdtable.back().pattern, slash_pos + 1); + checkcpdtable.back().pattern.resize(slash_pos); + checkcpdtable.back().cond = pHMgr->decode_flag(chunk.c_str()); + } + break; + } + case 2: { + checkcpdtable.back().pattern2.assign(start_piece, iter); + size_t slash_pos = checkcpdtable.back().pattern2.find('/'); + if (slash_pos != std::string::npos) { + std::string chunk(checkcpdtable.back().pattern2, slash_pos + 1); + checkcpdtable.back().pattern2.resize(slash_pos); + checkcpdtable.back().cond2 = pHMgr->decode_flag(chunk.c_str()); + } + break; + } + case 3: { + checkcpdtable.back().pattern3.assign(start_piece, iter); + simplifiedcpd = 1; + break; + } + default: + break; + } + i++; + start_piece = mystrsep(nl, iter); + } + } + return true; +} + +/* parse in the compound rule table */ +bool AffixMgr::parse_defcpdtable(const std::string& line, FileMgr* af) { + if (parseddefcpd) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + parseddefcpd = true; + int numdefcpd = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numdefcpd = atoi(std::string(start_piece, iter).c_str()); + if (numdefcpd < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + defcpdtable.reserve(numdefcpd); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numdefcpd lines to read in the remainder of the table */ + for (int j = 0; j < numdefcpd; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + defcpdtable.push_back(flagentry()); + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 12, "COMPOUNDRULE", 12) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numdefcpd = 0; + return false; + } + break; + } + case 1: { // handle parenthesized flags + if (std::find(start_piece, iter, '(') != iter) { + for (std::string::const_iterator k = start_piece; k != iter; ++k) { + std::string::const_iterator chb = k; + std::string::const_iterator che = k + 1; + if (*k == '(') { + std::string::const_iterator parpos = std::find(k, iter, ')'); + if (parpos != iter) { + chb = k + 1; + che = parpos; + k = parpos; + } + } + + if (*chb == '*' || *chb == '?') { + defcpdtable.back().push_back((FLAG)*chb); + } else { + pHMgr->decode_flags(defcpdtable.back(), std::string(chb, che), af); + } + } + } else { + pHMgr->decode_flags(defcpdtable.back(), std::string(start_piece, iter), af); + } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + if (defcpdtable.back().empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; +} + +/* parse in the character map table */ +bool AffixMgr::parse_maptable(const std::string& line, FileMgr* af) { + if (parsedmaptable) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + parsedmaptable = true; + int nummap = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + nummap = atoi(std::string(start_piece, iter).c_str()); + if (nummap < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + maptable.reserve(nummap); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the nummap lines to read in the remainder of the table */ + for (int j = 0; j < nummap; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + maptable.push_back(mapentry()); + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 3, "MAP", 3) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + nummap = 0; + return false; + } + break; + } + case 1: { + for (std::string::const_iterator k = start_piece; k != iter; ++k) { + std::string::const_iterator chb = k; + std::string::const_iterator che = k + 1; + if (*k == '(') { + std::string::const_iterator parpos = std::find(k, iter, ')'); + if (parpos != iter) { + chb = k + 1; + che = parpos; + k = parpos; + } + } else { + if (utf8 && (*k & 0xc0) == 0xc0) { + ++k; + while (k != iter && (*k & 0xc0) == 0x80) + ++k; + che = k; + --k; + } + } + maptable.back().push_back(std::string(chb, che)); + } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + if (maptable.back().empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; +} + +/* parse in the word breakpoint table */ +bool AffixMgr::parse_breaktable(const std::string& line, FileMgr* af) { + if (parsedbreaktable) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + parsedbreaktable = true; + int numbreak = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numbreak = atoi(std::string(start_piece, iter).c_str()); + if (numbreak < 0) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + if (numbreak == 0) + return true; + breaktable.reserve(numbreak); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numbreak lines to read in the remainder of the table */ + for (int j = 0; j < numbreak; ++j) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + i = 0; + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 5, "BREAK", 5) != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + numbreak = 0; + return false; + } + break; + } + case 1: { + breaktable.push_back(std::string(start_piece, iter)); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + + if (breaktable.size() != static_cast<size_t>(numbreak)) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + + return true; +} + +void AffixMgr::reverse_condition(std::string& piece) { + if (piece.empty()) + return; + + int neg = 0; + for (std::string::reverse_iterator k = piece.rbegin(); k != piece.rend(); ++k) { + switch (*k) { + case '[': { + if (neg) + *(k - 1) = '['; + else + *k = ']'; + break; + } + case ']': { + *k = '['; + if (neg) + *(k - 1) = '^'; + neg = 0; + break; + } + case '^': { + if (*(k - 1) == ']') + neg = 1; + else if (neg) + *(k - 1) = *k; + break; + } + default: { + if (neg) + *(k - 1) = *k; + } + } + } +} + +class entries_container { + std::vector<AffEntry*> entries; + AffixMgr* m_mgr; + char m_at; +public: + entries_container(char at, AffixMgr* mgr) + : m_mgr(mgr) + , m_at(at) { + } + void release() { + entries.clear(); + } + void initialize(int numents, + char opts, unsigned short aflag) { + entries.reserve(numents); + + if (m_at == 'P') { + entries.push_back(new PfxEntry(m_mgr)); + } else { + entries.push_back(new SfxEntry(m_mgr)); + } + + entries.back()->opts = opts; + entries.back()->aflag = aflag; + } + + AffEntry* add_entry(char opts) { + if (m_at == 'P') { + entries.push_back(new PfxEntry(m_mgr)); + } else { + entries.push_back(new SfxEntry(m_mgr)); + } + AffEntry* ret = entries.back(); + ret->opts = entries[0]->opts & opts; + return ret; + } + + AffEntry* first_entry() { + return entries.empty() ? NULL : entries[0]; + } + + ~entries_container() { + for (size_t i = 0; i < entries.size(); ++i) { + delete entries[i]; + } + } + + std::vector<AffEntry*>::iterator begin() { return entries.begin(); } + std::vector<AffEntry*>::iterator end() { return entries.end(); } +}; + +bool AffixMgr::parse_affix(const std::string& line, + const char at, + FileMgr* af, + char* dupflags) { + int numents = 0; // number of AffEntry structures to parse + + unsigned short aflag = 0; // affix char identifier + + char ff = 0; + entries_container affentries(at, this); + + int i = 0; + +// checking lines with bad syntax +#ifdef DEBUG + int basefieldnum = 0; +#endif + + // split affix header line into pieces + + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + // piece 1 - is type of affix + case 0: { + np++; + break; + } + + // piece 2 - is affix char + case 1: { + np++; + aflag = pHMgr->decode_flag(std::string(start_piece, iter).c_str()); + if (((at == 'S') && (dupflags[aflag] & dupSFX)) || + ((at == 'P') && (dupflags[aflag] & dupPFX))) { + HUNSPELL_WARNING( + stderr, + "error: line %d: multiple definitions of an affix flag\n", + af->getlinenum()); + } + dupflags[aflag] += (char)((at == 'S') ? dupSFX : dupPFX); + break; + } + // piece 3 - is cross product indicator + case 2: { + np++; + if (*start_piece == 'Y') + ff = aeXPRODUCT; + break; + } + + // piece 4 - is number of affentries + case 3: { + np++; + numents = atoi(std::string(start_piece, iter).c_str()); + if ((numents <= 0) || ((std::numeric_limits<size_t>::max() / + sizeof(AffEntry)) < static_cast<size_t>(numents))) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + free(err); + } + return false; + } + + char opts = ff; + if (utf8) + opts |= aeUTF8; + if (pHMgr->is_aliasf()) + opts |= aeALIASF; + if (pHMgr->is_aliasm()) + opts |= aeALIASM; + affentries.initialize(numents, opts, aflag); + } + + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + // check to make sure we parsed enough pieces + if (np != 4) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + free(err); + } + return false; + } + + // now parse numents affentries for this affix + AffEntry* entry = affentries.first_entry(); + for (int ent = 0; ent < numents; ++ent) { + std::string nl; + if (!af->getline(nl)) + return false; + mychomp(nl); + + iter = nl.begin(); + i = 0; + np = 0; + + // split line into pieces + start_piece = mystrsep(nl, iter); + while (start_piece != nl.end()) { + switch (i) { + // piece 1 - is type + case 0: { + np++; + if (ent != 0) + entry = affentries.add_entry((char)(aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM)); + break; + } + + // piece 2 - is affix char + case 1: { + np++; + std::string chunk(start_piece, iter); + if (pHMgr->decode_flag(chunk.c_str()) != aflag) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, + "error: line %d: affix %s is corrupt\n", + af->getlinenum(), err); + free(err); + } + return false; + } + + if (ent != 0) { + AffEntry* start_entry = affentries.first_entry(); + entry->aflag = start_entry->aflag; + } + break; + } + + // piece 3 - is string to strip or 0 for null + case 2: { + np++; + entry->strip = std::string(start_piece, iter); + if (complexprefixes) { + if (utf8) + reverseword_utf(entry->strip); + else + reverseword(entry->strip); + } + if (entry->strip.compare("0") == 0) { + entry->strip.clear(); + } + break; + } + + // piece 4 - is affix string or 0 for null + case 3: { + entry->morphcode = NULL; + entry->contclass = NULL; + entry->contclasslen = 0; + np++; + std::string::const_iterator dash = std::find(start_piece, iter, '/'); + if (dash != iter) { + entry->appnd = std::string(start_piece, dash); + std::string dash_str(dash + 1, iter); + + if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { + if (utf8) { + remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); + } else { + remove_ignored_chars(entry->appnd, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + reverseword_utf(entry->appnd); + else + reverseword(entry->appnd); + } + + if (pHMgr->is_aliasf()) { + int index = atoi(dash_str.c_str()); + entry->contclasslen = (unsigned short)pHMgr->get_aliasf( + index, &(entry->contclass), af); + if (!entry->contclasslen) + HUNSPELL_WARNING(stderr, + "error: bad affix flag alias: \"%s\"\n", + dash_str.c_str()); + } else { + entry->contclasslen = (unsigned short)pHMgr->decode_flags( + &(entry->contclass), dash_str.c_str(), af); + std::sort(entry->contclass, entry->contclass + entry->contclasslen); + } + + havecontclass = 1; + for (unsigned short _i = 0; _i < entry->contclasslen; _i++) { + contclasses[(entry->contclass)[_i]] = 1; + } + } else { + entry->appnd = std::string(start_piece, iter); + + if (!ignorechars.empty() && !has_no_ignored_chars(entry->appnd, ignorechars)) { + if (utf8) { + remove_ignored_chars_utf(entry->appnd, ignorechars_utf16); + } else { + remove_ignored_chars(entry->appnd, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + reverseword_utf(entry->appnd); + else + reverseword(entry->appnd); + } + } + + if (entry->appnd.compare("0") == 0) { + entry->appnd.clear(); + } + break; + } + + // piece 5 - is the conditions descriptions + case 4: { + std::string chunk(start_piece, iter); + np++; + if (complexprefixes) { + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + reverse_condition(chunk); + } + if (!entry->strip.empty() && chunk != "." && + redundant_condition(at, entry->strip.c_str(), entry->strip.size(), chunk.c_str(), + af->getlinenum())) + chunk = "."; + if (at == 'S') { + reverseword(chunk); + reverse_condition(chunk); + } + if (encodeit(*entry, chunk.c_str())) + return false; + break; + } + + case 5: { + std::string chunk(start_piece, iter); + np++; + if (pHMgr->is_aliasm()) { + int index = atoi(chunk.c_str()); + entry->morphcode = pHMgr->get_aliasm(index); + } else { + if (complexprefixes) { // XXX - fix me for morph. gen. + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + } + // add the remaining of the line + std::string::const_iterator end = nl.end(); + if (iter != end) { + chunk.append(iter, end); + } + entry->morphcode = mystrdup(chunk.c_str()); + if (!entry->morphcode) + return false; + } + break; + } + default: + break; + } + i++; + start_piece = mystrsep(nl, iter); + } + // check to make sure we parsed enough pieces + if (np < 4) { + char* err = pHMgr->encode_flag(aflag); + if (err) { + HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n", + af->getlinenum(), err); + free(err); + } + return false; + } + +#ifdef DEBUG + // detect unnecessary fields, excepting comments + if (basefieldnum) { + int fieldnum = + !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); + if (fieldnum != basefieldnum) + HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", + af->getlinenum()); + } else { + basefieldnum = + !(entry->morphcode) ? 5 : ((*(entry->morphcode) == '#') ? 5 : 6); + } +#endif + } + + // now create SfxEntry or PfxEntry objects and use links to + // build an ordered (sorted by affix string) list + std::vector<AffEntry*>::iterator start = affentries.begin(); + std::vector<AffEntry*>::iterator end = affentries.end(); + for (std::vector<AffEntry*>::iterator affentry = start; affentry != end; ++affentry) { + if (at == 'P') { + build_pfxtree(static_cast<PfxEntry*>(*affentry)); + } else { + build_sfxtree(static_cast<SfxEntry*>(*affentry)); + } + } + + //contents belong to AffixMgr now + affentries.release(); + + return true; +} + +int AffixMgr::redundant_condition(char ft, + const char* strip, + int stripl, + const char* cond, + int linenum) { + int condl = strlen(cond); + int i; + int j; + int neg; + int in; + if (ft == 'P') { // prefix + if (strncmp(strip, cond, condl) == 0) + return 1; + if (utf8) { + } else { + for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) { + if (cond[j] != '[') { + if (cond[j] != strip[i]) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } else { + neg = (cond[j + 1] == '^') ? 1 : 0; + in = 0; + do { + j++; + if (strip[i] == cond[j]) + in = 1; + } while ((j < (condl - 1)) && (cond[j] != ']')); + if (j == (condl - 1) && (cond[j] != ']')) { + HUNSPELL_WARNING(stderr, + "error: line %d: missing ] in condition:\n%s\n", + linenum, cond); + return 0; + } + if ((!neg && !in) || (neg && in)) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } + } + if (j >= condl) + return 1; + } + } else { // suffix + if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) + return 1; + if (utf8) { + } else { + for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) { + if (cond[j] != ']') { + if (cond[j] != strip[i]) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } else { + in = 0; + do { + j--; + if (strip[i] == cond[j]) + in = 1; + } while ((j > 0) && (cond[j] != '[')); + if ((j == 0) && (cond[j] != '[')) { + HUNSPELL_WARNING(stderr, + "error: line: %d: missing ] in condition:\n%s\n", + linenum, cond); + return 0; + } + neg = (cond[j + 1] == '^') ? 1 : 0; + if ((!neg && !in) || (neg && in)) { + HUNSPELL_WARNING(stderr, + "warning: line %d: incompatible stripping " + "characters and condition\n", + linenum); + return 0; + } + } + } + if (j < 0) + return 1; + } + } + return 0; +} + +std::vector<std::string> AffixMgr::get_suffix_words(short unsigned* suff, + int len, + const char* root_word) { + std::vector<std::string> slst; + short unsigned* start_ptr = suff; + for (int j = 0; j < SETSIZE; j++) { + SfxEntry* ptr = sStart[j]; + while (ptr) { + suff = start_ptr; + for (int i = 0; i < len; i++) { + if ((*suff) == ptr->getFlag()) { + std::string nw(root_word); + nw.append(ptr->getAffix()); + hentry* ht = ptr->checkword(nw.c_str(), nw.size(), 0, NULL, 0, 0, 0); + if (ht) { + slst.push_back(nw); + } + } + suff++; + } + ptr = ptr->getNext(); + } + } + return slst; +} diff --git a/extensions/spellcheck/hunspell/src/affixmgr.hxx b/extensions/spellcheck/hunspell/src/affixmgr.hxx new file mode 100644 index 0000000000..450f50a65c --- /dev/null +++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx @@ -0,0 +1,368 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef AFFIXMGR_HXX_ +#define AFFIXMGR_HXX_ + +#include <stdio.h> + +#include <string> +#include <vector> + +#include "atypes.hxx" +#include "baseaffix.hxx" +#include "hashmgr.hxx" +#include "phonet.hxx" +#include "replist.hxx" + +// check flag duplication +#define dupSFX (1 << 0) +#define dupPFX (1 << 1) + +class PfxEntry; +class SfxEntry; + +class AffixMgr { + PfxEntry* pStart[SETSIZE]; + SfxEntry* sStart[SETSIZE]; + PfxEntry* pFlag[SETSIZE]; + SfxEntry* sFlag[SETSIZE]; + const std::vector<HashMgr*>& alldic; + const HashMgr* pHMgr; + std::string keystring; + std::string trystring; + std::string encoding; + struct cs_info* csconv; + int utf8; + int complexprefixes; + FLAG compoundflag; + FLAG compoundbegin; + FLAG compoundmiddle; + FLAG compoundend; + FLAG compoundroot; + FLAG compoundforbidflag; + FLAG compoundpermitflag; + int compoundmoresuffixes; + int checkcompounddup; + int checkcompoundrep; + int checkcompoundcase; + int checkcompoundtriple; + int simplifiedtriple; + FLAG forbiddenword; + FLAG nosuggest; + FLAG nongramsuggest; + FLAG needaffix; + int cpdmin; + RepList* iconvtable; + RepList* oconvtable; + bool parsedmaptable; + std::vector<mapentry> maptable; + bool parsedbreaktable; + std::vector<std::string> breaktable; + bool parsedcheckcpd; + std::vector<patentry> checkcpdtable; + int simplifiedcpd; + bool parseddefcpd; + std::vector<flagentry> defcpdtable; + phonetable* phone; + int maxngramsugs; + int maxcpdsugs; + int maxdiff; + int onlymaxdiff; + int nosplitsugs; + int sugswithdots; + int cpdwordmax; + int cpdmaxsyllable; + std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit, + std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding + std::string cpdsyllablenum; // syllable count incrementing flag + const char* pfxappnd; // BUG: not stateless + const char* sfxappnd; // BUG: not stateless + int sfxextra; // BUG: not stateless + FLAG sfxflag; // BUG: not stateless + char* derived; // BUG: not stateless + SfxEntry* sfx; // BUG: not stateless + PfxEntry* pfx; // BUG: not stateless + int checknum; + std::string wordchars; // letters + spec. word characters + std::vector<w_char> wordchars_utf16; + std::string ignorechars; // letters + spec. word characters + std::vector<w_char> ignorechars_utf16; + std::string version; // affix and dictionary file version string + std::string lang; // language + int langnum; + FLAG lemma_present; + FLAG circumfix; + FLAG onlyincompound; + FLAG keepcase; + FLAG forceucase; + FLAG warn; + int forbidwarn; + FLAG substandard; + int checksharps; + int fullstrip; + + int havecontclass; // boolean variable + char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold + // affix) + + public: + AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL); + ~AffixMgr(); + struct hentry* affix_check(const char* word, + int len, + const unsigned short needflag = (unsigned short)0, + char in_compound = IN_CPD_NOT); + struct hentry* prefix_check(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + inline int isSubset(const char* s1, const char* s2); + struct hentry* prefix_check_twosfx(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + inline int isRevSubset(const char* s1, const char* end_of_s2, int len); + struct hentry* suffix_check(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG cclass = FLAG_NULL, + const FLAG needflag = FLAG_NULL, + char in_compound = IN_CPD_NOT); + struct hentry* suffix_check_twosfx(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag = FLAG_NULL); + + std::string affix_check_morph(const char* word, + int len, + const FLAG needflag = FLAG_NULL, + char in_compound = IN_CPD_NOT); + std::string prefix_check_morph(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + std::string suffix_check_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG cclass = FLAG_NULL, + const FLAG needflag = FLAG_NULL, + char in_compound = IN_CPD_NOT); + + std::string prefix_check_twosfx_morph(const char* word, + int len, + char in_compound, + const FLAG needflag = FLAG_NULL); + std::string suffix_check_twosfx_morph(const char* word, + int len, + int sfxopts, + PfxEntry* ppfx, + const FLAG needflag = FLAG_NULL); + + std::string morphgen(const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* morph, + const char* targetmorph, + int level); + + int expand_rootword(struct guessword* wlst, + int maxn, + const char* ts, + int wl, + const unsigned short* ap, + unsigned short al, + const char* bad, + int, + const char*); + + short get_syllable(const std::string& word); + int cpdrep_check(const char* word, int len); + int cpdwordpair_check(const char * word, int len); + int cpdpat_check(const char* word, + int len, + hentry* r1, + hentry* r2, + const char affixed); + int defcpd_check(hentry*** words, + short wnum, + hentry* rv, + hentry** rwords, + char all); + int cpdcase_check(const char* word, int len); + inline int candidate_check(const char* word, int len); + void setcminmax(int* cmin, int* cmax, const char* word, int len); + struct hentry* compound_check(const std::string& word, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words, + hentry** rwords, + char hu_mov_rule, + char is_sug, + int* info); + + int compound_check_morph(const char* word, + int len, + short wordnum, + short numsyllable, + short maxwordnum, + short wnum, + hentry** words, + hentry** rwords, + char hu_mov_rule, + std::string& result, + const std::string* partresult); + + std::vector<std::string> get_suffix_words(short unsigned* suff, + int len, + const char* root_word); + + struct hentry* lookup(const char* word); + const std::vector<replentry>& get_reptable() const; + RepList* get_iconvtable() const; + RepList* get_oconvtable() const; + struct phonetable* get_phonetable() const; + const std::vector<mapentry>& get_maptable() const; + const std::vector<std::string>& get_breaktable() const; + const std::string& get_encoding(); + int get_langnum() const; + char* get_key_string(); + char* get_try_string() const; + const std::string& get_wordchars() const; + const std::vector<w_char>& get_wordchars_utf16() const; + const char* get_ignore() const; + const std::vector<w_char>& get_ignore_utf16() const; + int get_compound() const; + FLAG get_compoundflag() const; + FLAG get_forbiddenword() const; + FLAG get_nosuggest() const; + FLAG get_nongramsuggest() const; + FLAG get_substandard() const; + FLAG get_needaffix() const; + FLAG get_onlyincompound() const; + const char* get_derived() const; + const std::string& get_version() const; + int have_contclass() const; + int get_utf8() const; + int get_complexprefixes() const; + char* get_suffixed(char) const; + int get_maxngramsugs() const; + int get_maxcpdsugs() const; + int get_maxdiff() const; + int get_onlymaxdiff() const; + int get_nosplitsugs() const; + int get_sugswithdots(void) const; + FLAG get_keepcase(void) const; + FLAG get_forceucase(void) const; + FLAG get_warn(void) const; + int get_forbidwarn(void) const; + int get_checksharps(void) const; + char* encode_flag(unsigned short aflag) const; + int get_fullstrip() const; + + private: + int parse_file(const char* affpath, const char* key); + bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af); + bool parse_num(const std::string& line, int* out, FileMgr* af); + bool parse_cpdsyllable(const std::string& line, FileMgr* af); + bool parse_convtable(const std::string& line, + FileMgr* af, + RepList** rl, + const std::string& keyword); + bool parse_phonetable(const std::string& line, FileMgr* af); + bool parse_maptable(const std::string& line, FileMgr* af); + bool parse_breaktable(const std::string& line, FileMgr* af); + bool parse_checkcpdtable(const std::string& line, FileMgr* af); + bool parse_defcpdtable(const std::string& line, FileMgr* af); + bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags); + + void reverse_condition(std::string&); + std::string& debugflag(std::string& result, unsigned short flag); + int condlen(const char*); + int encodeit(AffEntry& entry, const char* cs); + int build_pfxtree(PfxEntry* pfxptr); + int build_sfxtree(SfxEntry* sfxptr); + int process_pfx_order(); + int process_sfx_order(); + PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr); + SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr); + int process_pfx_tree_to_list(); + int process_sfx_tree_to_list(); + int redundant_condition(char, const char* strip, int stripl, const char* cond, int); + void finishFileMgr(FileMgr* afflst); +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/atypes.hxx b/extensions/spellcheck/hunspell/src/atypes.hxx new file mode 100644 index 0000000000..1b78d4724b --- /dev/null +++ b/extensions/spellcheck/hunspell/src/atypes.hxx @@ -0,0 +1,129 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef ATYPES_HXX_ +#define ATYPES_HXX_ + +#ifndef HUNSPELL_WARNING +#include <stdio.h> +#ifdef HUNSPELL_WARNING_ON +#define HUNSPELL_WARNING fprintf +#else +// empty inline function to switch off warnings (instead of the C99 standard +// variadic macros) +static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {} +#endif +#endif + +// HUNSTEM def. +#define HUNSTEM + +#include "w_char.hxx" +#include <algorithm> +#include <string> +#include <vector> + +#define SETSIZE 256 +#define CONTSIZE 65536 + +// AffEntry options +#define aeXPRODUCT (1 << 0) +#define aeUTF8 (1 << 1) +#define aeALIASF (1 << 2) +#define aeALIASM (1 << 3) +#define aeLONGCOND (1 << 4) + +// compound options +#define IN_CPD_NOT 0 +#define IN_CPD_BEGIN 1 +#define IN_CPD_END 2 +#define IN_CPD_OTHER 3 + +// info options +#define SPELL_COMPOUND (1 << 0) +#define SPELL_FORBIDDEN (1 << 1) +#define SPELL_ALLCAP (1 << 2) +#define SPELL_NOCAP (1 << 3) +#define SPELL_INITCAP (1 << 4) +#define SPELL_ORIGCAP (1 << 5) +#define SPELL_WARN (1 << 6) + +#define MINCPDLEN 3 +#define MAXCOMPOUND 10 +#define MAXCONDLEN 20 +#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*)) + +#define MAXACC 1000 + +#define FLAG unsigned short +#define FLAG_NULL 0x00 +#define FREE_FLAG(a) a = 0 + +#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b)) + +// timelimit: max. ~1/4 sec (process time on Linux) for +// for a suggestion, including max. ~/10 sec for a case +// sensitive plain or compound word suggestion, within +// ~1/20 sec long time consuming suggestion functions +#define TIMELIMIT_GLOBAL (CLOCKS_PER_SEC / 4) +#define TIMELIMIT_SUGGESTION (CLOCKS_PER_SEC / 10) +#define TIMELIMIT (CLOCKS_PER_SEC / 20) +#define MINTIMER 100 +#define MAXPLUSTIMER 100 + +struct guessword { + char* word; + bool allow; + char* orig; +}; + +typedef std::vector<std::string> mapentry; +typedef std::vector<FLAG> flagentry; + +struct patentry { + std::string pattern; + std::string pattern2; + std::string pattern3; + FLAG cond; + FLAG cond2; + patentry() + : cond(FLAG_NULL) + , cond2(FLAG_NULL) { + } +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/baseaffix.hxx b/extensions/spellcheck/hunspell/src/baseaffix.hxx new file mode 100644 index 0000000000..52cd60e028 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/baseaffix.hxx @@ -0,0 +1,74 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef BASEAFF_HXX_ +#define BASEAFF_HXX_ + +#include <string> + +class AffEntry { + private: + AffEntry(const AffEntry&); + AffEntry& operator=(const AffEntry&); + + public: + AffEntry() + : numconds(0), + opts(0), + aflag(0), + morphcode(0), + contclass(NULL), + contclasslen(0) {} + virtual ~AffEntry(); + std::string appnd; + std::string strip; + unsigned char numconds; + char opts; + unsigned short aflag; + union { + char conds[MAXCONDLEN]; + struct { + char conds1[MAXCONDLEN_1]; + char* conds2; + } l; + } c; + char* morphcode; + unsigned short* contclass; + short contclasslen; +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx new file mode 100644 index 0000000000..39a54d3802 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/csutil.cxx @@ -0,0 +1,2551 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <algorithm> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <sstream> + +#include "csutil.hxx" +#include "atypes.hxx" +#include "langnum.hxx" + +#ifdef _WIN32 +#include <windows.h> +#include <wchar.h> +#endif + +#ifdef OPENOFFICEORG +#include <unicode/uchar.h> +#else +#ifndef MOZILLA_CLIENT +#include "utf_info.hxx" +#define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info))) +#endif +#endif + +#ifdef MOZILLA_CLIENT +#include "mozHunspellRLBoxGlue.h" +#endif + +struct unicode_info2 { + char cletter; + unsigned short cupper; + unsigned short clower; +}; + +static struct unicode_info2* utf_tbl = NULL; +static int utf_tbl_count = + 0; // utf_tbl can be used by multiple Hunspell instances + +#ifndef MOZILLA_CLIENT +void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode) +{ +#if defined(_WIN32) && defined(_MSC_VER) +#define WIN32_LONG_PATH_PREFIX "\\\\?\\" + if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) { + int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); + wchar_t* buff = new wchar_t[len]; + wchar_t* buff2 = new wchar_t[len]; + MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len); + if (_wfullpath(buff2, buff, len) != NULL) { + stream.open(buff2, mode); + } + delete [] buff; + delete [] buff2; + } + else +#endif + stream.open(path, mode); +} +#endif + +std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) { + dest.clear(); + std::vector<w_char>::const_iterator u2 = src.begin(); + std::vector<w_char>::const_iterator u2_max = src.end(); + while (u2 < u2_max) { + signed char u8; + if (u2->h) { // > 0xFF + // XXX 4-byte haven't implemented yet. + if (u2->h >= 0x08) { // >= 0x800 (3-byte UTF-8 character) + u8 = 0xe0 + (u2->h >> 4); + dest.push_back(u8); + u8 = 0x80 + ((u2->h & 0xf) << 2) + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } else { // < 0x800 (2-byte UTF-8 character) + u8 = 0xc0 + (u2->h << 2) + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } + } else { // <= 0xFF + if (u2->l & 0x80) { // >0x80 (2-byte UTF-8 character) + u8 = 0xc0 + (u2->l >> 6); + dest.push_back(u8); + u8 = 0x80 + (u2->l & 0x3f); + dest.push_back(u8); + } else { // < 0x80 (1-byte UTF-8 character) + u8 = u2->l; + dest.push_back(u8); + } + } + ++u2; + } + return dest; +} + +int u8_u16(std::vector<w_char>& dest, const std::string& src) { + dest.clear(); + std::string::const_iterator u8 = src.begin(); + std::string::const_iterator u8_max = src.end(); + + while (u8 < u8_max) { + w_char u2; + switch ((*u8) & 0xf0) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: { + u2.h = 0; + u2.l = *u8; + break; + } + case 0x80: + case 0x90: + case 0xa0: + case 0xb0: { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Unexpected continuation bytes " + "in %ld. character position\n%s\n", + static_cast<long>(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + break; + } + case 0xc0: + case 0xd0: { // 2-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.h = (*u8 & 0x1f) >> 2; + u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f); + ++u8; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast<long>(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + break; + } + case 0xe0: { // 3-byte UTF-8 codes + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.h = ((*u8 & 0x0f) << 4) + ((*(u8 + 1) & 0x3f) >> 2); + ++u8; + if ((*(u8 + 1) & 0xc0) == 0x80) { + u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f); + ++u8; + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte " + "in %ld. character position:\n%s\n", + static_cast<long>(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + } else { + HUNSPELL_WARNING(stderr, + "UTF-8 encoding error. Missing continuation byte in " + "%ld. character position:\n%s\n", + static_cast<long>(std::distance(src.begin(), u8)), + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + } + break; + } + case 0xf0: { // 4 or more byte UTF-8 codes + HUNSPELL_WARNING(stderr, + "This UTF-8 encoding can't convert to UTF-16:\n%s\n", + src.c_str()); + u2.h = 0xff; + u2.l = 0xfd; + dest.push_back(u2); + return -1; + } + } + dest.push_back(u2); + ++u8; + } + + return dest.size(); +} + +namespace { +class is_any_of { + public: + explicit is_any_of(const std::string& in) : chars(in) {} + + bool operator()(char c) { return chars.find(c) != std::string::npos; } + + private: + std::string chars; +}; +} + +std::string::const_iterator mystrsep(const std::string &str, + std::string::const_iterator& start) { + std::string::const_iterator end = str.end(); + + is_any_of op(" \t"); + // don't use isspace() here, the string can be in some random charset + // that's way different than the locale's + std::string::const_iterator sp = start; + while (sp != end && op(*sp)) + ++sp; + + std::string::const_iterator dp = sp; + while (dp != end && !op(*dp)) + ++dp; + + start = dp; + return sp; +} + +// replaces strdup with ansi version +char* mystrdup(const char* s) { + char* d = NULL; + if (s) { + size_t sl = strlen(s) + 1; + d = (char*)malloc(sl); + if (d) { + memcpy(d, s, sl); + } else { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + } + } + return d; +} + +// remove cross-platform text line end characters +void mychomp(std::string& s) { + size_t k = s.size(); + size_t newsize = k; + if ((k > 0) && ((s[k - 1] == '\r') || (s[k - 1] == '\n'))) + --newsize; + if ((k > 1) && (s[k - 2] == '\r')) + --newsize; + s.resize(newsize); +} + +// break text to lines +std::vector<std::string> line_tok(const std::string& text, char breakchar) { + std::vector<std::string> ret; + if (text.empty()) { + return ret; + } + + std::stringstream ss(text); + std::string tok; + while(std::getline(ss, tok, breakchar)) { + if (!tok.empty()) { + ret.push_back(tok); + } + } + + return ret; +} + +// uniq line in place +void line_uniq(std::string& text, char breakchar) +{ + std::vector<std::string> lines = line_tok(text, breakchar); + text.clear(); + if (lines.empty()) { + return; + } + text = lines[0]; + for (size_t i = 1; i < lines.size(); ++i) { + bool dup = false; + for (size_t j = 0; j < i; ++j) { + if (lines[i] == lines[j]) { + dup = true; + break; + } + } + if (!dup) { + if (!text.empty()) + text.push_back(breakchar); + text.append(lines[i]); + } + } +} + +// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) " +void line_uniq_app(std::string& text, char breakchar) { + if (text.find(breakchar) == std::string::npos) { + return; + } + + std::vector<std::string> lines = line_tok(text, breakchar); + text.clear(); + if (lines.empty()) { + return; + } + text = lines[0]; + for (size_t i = 1; i < lines.size(); ++i) { + bool dup = false; + for (size_t j = 0; j < i; ++j) { + if (lines[i] == lines[j]) { + dup = true; + break; + } + } + if (!dup) { + if (!text.empty()) + text.push_back(breakchar); + text.append(lines[i]); + } + } + + if (lines.size() == 1) { + text = lines[0]; + return; + } + + text.assign(" ( "); + for (size_t i = 0; i < lines.size(); ++i) { + text.append(lines[i]); + text.append(" | "); + } + text[text.size() - 2] = ')'; // " ) " +} + +// append s to ends of every lines in text +std::string& strlinecat(std::string& str, const std::string& apd) { + size_t pos = 0; + while ((pos = str.find('\n', pos)) != std::string::npos) { + str.insert(pos, apd); + pos += apd.length() + 1; + } + str.append(apd); + return str; +} + +int fieldlen(const char* r) { + int n = 0; + while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') { + r++; + n++; + } + return n; +} + +bool copy_field(std::string& dest, + const std::string& morph, + const std::string& var) { + if (morph.empty()) + return false; + size_t pos = morph.find(var); + if (pos == std::string::npos) + return false; + dest.clear(); + std::string beg(morph.substr(pos + MORPH_TAG_LEN, std::string::npos)); + + for (size_t i = 0; i < beg.size(); ++i) { + const char c(beg[i]); + if (c == ' ' || c == '\t' || c == '\n') + break; + dest.push_back(c); + } + + return true; +} + +std::string& mystrrep(std::string& str, + const std::string& search, + const std::string& replace) { + size_t pos = 0; + while ((pos = str.find(search, pos)) != std::string::npos) { + str.replace(pos, search.length(), replace); + pos += replace.length(); + } + return str; +} + +// reverse word +size_t reverseword(std::string& word) { + std::reverse(word.begin(), word.end()); + return word.size(); +} + +// reverse word +size_t reverseword_utf(std::string& word) { + std::vector<w_char> w; + u8_u16(w, word); + std::reverse(w.begin(), w.end()); + u16_u8(word, w); + return w.size(); +} + +void uniqlist(std::vector<std::string>& list) { + if (list.size() < 2) + return; + + std::vector<std::string> ret; + ret.push_back(list[0]); + + for (size_t i = 1; i < list.size(); ++i) { + if (std::find(ret.begin(), ret.end(), list[i]) == ret.end()) + ret.push_back(list[i]); + } + + list.swap(ret); +} + +namespace { +unsigned char cupper(const struct cs_info* csconv, int nIndex) { + assert(nIndex >= 0 && nIndex <= 255); + return csconv[nIndex].cupper; +} + +unsigned char clower(const struct cs_info* csconv, int nIndex) { + assert(nIndex >= 0 && nIndex <= 255); + return csconv[nIndex].clower; +} + +unsigned char ccase(const struct cs_info* csconv, int nIndex) { + assert(nIndex >= 0 && nIndex <= 255); + return csconv[nIndex].ccase; +} +} + +w_char upper_utf(w_char u, int langnum) { + unsigned short idx = (u.h << 8) + u.l; + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u.h = (unsigned char)(upridx >> 8); + u.l = (unsigned char)(upridx & 0x00FF); + } + return u; +} + +w_char lower_utf(w_char u, int langnum) { + unsigned short idx = (u.h << 8) + u.l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u.h = (unsigned char)(lwridx >> 8); + u.l = (unsigned char)(lwridx & 0x00FF); + } + return u; +} + +// convert std::string to all caps +std::string& mkallcap(std::string& s, const struct cs_info* csconv) { + for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { + *aI = cupper(csconv, static_cast<unsigned char>(*aI)); + } + return s; +} + +// convert std::string to all little +std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { + for (std::string::iterator aI = s.begin(), aEnd = s.end(); aI != aEnd; ++aI) { + *aI = clower(csconv, static_cast<unsigned char>(*aI)); + } + return s; +} + +std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u, + int langnum) { + for (size_t i = 0; i < u.size(); ++i) { + unsigned short idx = (u[i].h << 8) + u[i].l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u[i].h = (unsigned char)(lwridx >> 8); + u[i].l = (unsigned char)(lwridx & 0x00FF); + } + } + return u; +} + +std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) { + for (size_t i = 0; i < u.size(); i++) { + unsigned short idx = (u[i].h << 8) + u[i].l; + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u[i].h = (unsigned char)(upridx >> 8); + u[i].l = (unsigned char)(upridx & 0x00FF); + } + } + return u; +} + +std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { + if (!s.empty()) { + s[0] = cupper(csconv, static_cast<unsigned char>(s[0])); + } + return s; +} + +std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) { + if (!u.empty()) { + unsigned short idx = (u[0].h << 8) + u[0].l; + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u[0].h = (unsigned char)(upridx >> 8); + u[0].l = (unsigned char)(upridx & 0x00FF); + } + } + return u; +} + +std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { + if (!s.empty()) { + s[0] = clower(csconv, static_cast<unsigned char>(s[0])); + } + return s; +} + +std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) { + if (!u.empty()) { + unsigned short idx = (u[0].h << 8) + u[0].l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u[0].h = (unsigned char)(lwridx >> 8); + u[0].l = (unsigned char)(lwridx & 0x00FF); + } + } + return u; +} + +// conversion function for protected memory +void store_pointer(char* dest, char* source) { + memcpy(dest, &source, sizeof(char*)); +} + +// conversion function for protected memory +char* get_stored_pointer(const char* s) { + char* p; + memcpy(&p, s, sizeof(char*)); + return p; +} + +#ifndef MOZILLA_CLIENT + +// these are simple character mappings for the +// encodings supported +// supplying isupper, tolower, and toupper + +static struct cs_info iso1_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso2_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xbe, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso3_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0x69, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0x49}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso4_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xb1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x01, 0xb3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb5, 0xa5}, {0x01, 0xb6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x01, 0xb9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x01, 0xbb, 0xab}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xbe, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xa1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xa3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xa5}, {0x00, 0xb6, 0xa6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xa9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xab}, {0x00, 0xbc, 0xac}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xae}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso5_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xf1, 0xa1}, + {0x01, 0xf2, 0xa2}, {0x01, 0xf3, 0xa3}, {0x01, 0xf4, 0xa4}, + {0x01, 0xf5, 0xa5}, {0x01, 0xf6, 0xa6}, {0x01, 0xf7, 0xa7}, + {0x01, 0xf8, 0xa8}, {0x01, 0xf9, 0xa9}, {0x01, 0xfa, 0xaa}, + {0x01, 0xfb, 0xab}, {0x01, 0xfc, 0xac}, {0x00, 0xad, 0xad}, + {0x01, 0xfe, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xd0, 0xb0}, + {0x01, 0xd1, 0xb1}, {0x01, 0xd2, 0xb2}, {0x01, 0xd3, 0xb3}, + {0x01, 0xd4, 0xb4}, {0x01, 0xd5, 0xb5}, {0x01, 0xd6, 0xb6}, + {0x01, 0xd7, 0xb7}, {0x01, 0xd8, 0xb8}, {0x01, 0xd9, 0xb9}, + {0x01, 0xda, 0xba}, {0x01, 0xdb, 0xbb}, {0x01, 0xdc, 0xbc}, + {0x01, 0xdd, 0xbd}, {0x01, 0xde, 0xbe}, {0x01, 0xdf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x00, 0xd0, 0xb0}, {0x00, 0xd1, 0xb1}, + {0x00, 0xd2, 0xb2}, {0x00, 0xd3, 0xb3}, {0x00, 0xd4, 0xb4}, + {0x00, 0xd5, 0xb5}, {0x00, 0xd6, 0xb6}, {0x00, 0xd7, 0xb7}, + {0x00, 0xd8, 0xb8}, {0x00, 0xd9, 0xb9}, {0x00, 0xda, 0xba}, + {0x00, 0xdb, 0xbb}, {0x00, 0xdc, 0xbc}, {0x00, 0xdd, 0xbd}, + {0x00, 0xde, 0xbe}, {0x00, 0xdf, 0xbf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xa1}, {0x00, 0xf2, 0xa2}, + {0x00, 0xf3, 0xa3}, {0x00, 0xf4, 0xa4}, {0x00, 0xf5, 0xa5}, + {0x00, 0xf6, 0xa6}, {0x00, 0xf7, 0xa7}, {0x00, 0xf8, 0xa8}, + {0x00, 0xf9, 0xa9}, {0x00, 0xfa, 0xaa}, {0x00, 0xfb, 0xab}, + {0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae}, + {0x00, 0xff, 0xaf}}; + +static struct cs_info iso6_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso7_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x01, 0xdc, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x01, 0xdd, 0xb8}, {0x01, 0xde, 0xb9}, + {0x01, 0xdf, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xfc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x01, 0xfd, 0xbe}, {0x01, 0xfe, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x00, 0xdc, 0xb6}, {0x00, 0xdd, 0xb8}, + {0x00, 0xde, 0xb9}, {0x00, 0xdf, 0xba}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd3}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso8_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso9_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0xfd, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0xdd}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0x69, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso10_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info koi8r_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xe0}, {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, + {0x00, 0xc3, 0xe3}, {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, + {0x00, 0xc6, 0xe6}, {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, + {0x00, 0xc9, 0xe9}, {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, + {0x00, 0xcc, 0xec}, {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, + {0x00, 0xcf, 0xef}, {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, + {0x00, 0xd2, 0xf2}, {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, + {0x00, 0xd5, 0xf5}, {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, + {0x00, 0xd8, 0xf8}, {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, + {0x00, 0xdb, 0xfb}, {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, + {0x00, 0xde, 0xfe}, {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, + {0x01, 0xc1, 0xe1}, {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, + {0x01, 0xc4, 0xe4}, {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, + {0x01, 0xc7, 0xe7}, {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, + {0x01, 0xca, 0xea}, {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, + {0x01, 0xcd, 0xed}, {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, + {0x01, 0xd0, 0xf0}, {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, + {0x01, 0xd3, 0xf3}, {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, + {0x01, 0xd6, 0xf6}, {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, + {0x01, 0xd9, 0xf9}, {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, + {0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, + {0x01, 0xdf, 0xff}}; + +static struct cs_info koi8u_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xb3}, {0x00, 0xa4, 0xb4}, /* ie */ + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xb6}, /* i */ + {0x00, 0xa7, 0xb7}, /* ii */ + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xbd}, /* g'' */ + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x01, 0xa3, 0xb3}, + {0x00, 0xb4, 0xb4}, /* IE */ + {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, /* I */ + {0x00, 0xb7, 0xb7}, /* II */ + {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, {0x00, 0xba, 0xba}, + {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, {0x00, 0xbd, 0xbd}, + {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, {0x00, 0xc0, 0xe0}, + {0x00, 0xc1, 0xe1}, {0x00, 0xc2, 0xe2}, {0x00, 0xc3, 0xe3}, + {0x00, 0xc4, 0xe4}, {0x00, 0xc5, 0xe5}, {0x00, 0xc6, 0xe6}, + {0x00, 0xc7, 0xe7}, {0x00, 0xc8, 0xe8}, {0x00, 0xc9, 0xe9}, + {0x00, 0xca, 0xea}, {0x00, 0xcb, 0xeb}, {0x00, 0xcc, 0xec}, + {0x00, 0xcd, 0xed}, {0x00, 0xce, 0xee}, {0x00, 0xcf, 0xef}, + {0x00, 0xd0, 0xf0}, {0x00, 0xd1, 0xf1}, {0x00, 0xd2, 0xf2}, + {0x00, 0xd3, 0xf3}, {0x00, 0xd4, 0xf4}, {0x00, 0xd5, 0xf5}, + {0x00, 0xd6, 0xf6}, {0x00, 0xd7, 0xf7}, {0x00, 0xd8, 0xf8}, + {0x00, 0xd9, 0xf9}, {0x00, 0xda, 0xfa}, {0x00, 0xdb, 0xfb}, + {0x00, 0xdc, 0xfc}, {0x00, 0xdd, 0xfd}, {0x00, 0xde, 0xfe}, + {0x00, 0xdf, 0xff}, {0x01, 0xc0, 0xe0}, {0x01, 0xc1, 0xe1}, + {0x01, 0xc2, 0xe2}, {0x01, 0xc3, 0xe3}, {0x01, 0xc4, 0xe4}, + {0x01, 0xc5, 0xe5}, {0x01, 0xc6, 0xe6}, {0x01, 0xc7, 0xe7}, + {0x01, 0xc8, 0xe8}, {0x01, 0xc9, 0xe9}, {0x01, 0xca, 0xea}, + {0x01, 0xcb, 0xeb}, {0x01, 0xcc, 0xec}, {0x01, 0xcd, 0xed}, + {0x01, 0xce, 0xee}, {0x01, 0xcf, 0xef}, {0x01, 0xd0, 0xf0}, + {0x01, 0xd1, 0xf1}, {0x01, 0xd2, 0xf2}, {0x01, 0xd3, 0xf3}, + {0x01, 0xd4, 0xf4}, {0x01, 0xd5, 0xf5}, {0x01, 0xd6, 0xf6}, + {0x01, 0xd7, 0xf7}, {0x01, 0xd8, 0xf8}, {0x01, 0xd9, 0xf9}, + {0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc}, + {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}}; + +static struct cs_info cp1251_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x01, 0x90, 0x80}, + {0x01, 0x83, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x81}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x01, 0x9a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x01, 0x9c, 0x8c}, + {0x01, 0x9d, 0x8d}, {0x01, 0x9e, 0x8e}, {0x01, 0x9f, 0x8f}, + {0x00, 0x90, 0x80}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x8a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x8c}, {0x00, 0x9d, 0x8d}, {0x00, 0x9e, 0x8e}, + {0x00, 0x9f, 0x8f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, + {0x00, 0xa2, 0xa1}, {0x01, 0xbc, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x01, 0xb4, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xbf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, + {0x00, 0xb4, 0xa5}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xaa}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xa3}, + {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xaf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x01, 0xff, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xdf}}; + +static struct cs_info iso13_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0A, 0x0A}, {0x00, 0x0B, 0x0B}, + {0x00, 0x0C, 0x0C}, {0x00, 0x0D, 0x0D}, {0x00, 0x0E, 0x0E}, + {0x00, 0x0F, 0x0F}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1A, 0x1A}, + {0x00, 0x1B, 0x1B}, {0x00, 0x1C, 0x1C}, {0x00, 0x1D, 0x1D}, + {0x00, 0x1E, 0x1E}, {0x00, 0x1F, 0x1F}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2A, 0x2A}, {0x00, 0x2B, 0x2B}, {0x00, 0x2C, 0x2C}, + {0x00, 0x2D, 0x2D}, {0x00, 0x2E, 0x2E}, {0x00, 0x2F, 0x2F}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3A, 0x3A}, {0x00, 0x3B, 0x3B}, + {0x00, 0x3C, 0x3C}, {0x00, 0x3D, 0x3D}, {0x00, 0x3E, 0x3E}, + {0x00, 0x3F, 0x3F}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6A, 0x4A}, + {0x01, 0x6B, 0x4B}, {0x01, 0x6C, 0x4C}, {0x01, 0x6D, 0x4D}, + {0x01, 0x6E, 0x4E}, {0x01, 0x6F, 0x4F}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7A, 0x5A}, {0x00, 0x5B, 0x5B}, {0x00, 0x5C, 0x5C}, + {0x00, 0x5D, 0x5D}, {0x00, 0x5E, 0x5E}, {0x00, 0x5F, 0x5F}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6A, 0x4A}, {0x00, 0x6B, 0x4B}, + {0x00, 0x6C, 0x4C}, {0x00, 0x6D, 0x4D}, {0x00, 0x6E, 0x4E}, + {0x00, 0x6F, 0x4F}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7A, 0x5A}, + {0x00, 0x7B, 0x7B}, {0x00, 0x7C, 0x7C}, {0x00, 0x7D, 0x7D}, + {0x00, 0x7E, 0x7E}, {0x00, 0x7F, 0x7F}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8A, 0x8A}, {0x00, 0x8B, 0x8B}, {0x00, 0x8C, 0x8C}, + {0x00, 0x8D, 0x8D}, {0x00, 0x8E, 0x8E}, {0x00, 0x8F, 0x8F}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9A, 0x9A}, {0x00, 0x9B, 0x9B}, + {0x00, 0x9C, 0x9C}, {0x00, 0x9D, 0x9D}, {0x00, 0x9E, 0x9E}, + {0x00, 0x9F, 0x9F}, {0x00, 0xA0, 0xA0}, {0x00, 0xA1, 0xA1}, + {0x00, 0xA2, 0xA2}, {0x00, 0xA3, 0xA3}, {0x00, 0xA4, 0xA4}, + {0x00, 0xA5, 0xA5}, {0x00, 0xA6, 0xA6}, {0x00, 0xA7, 0xA7}, + {0x01, 0xB8, 0xA8}, {0x00, 0xA9, 0xA9}, {0x01, 0xBA, 0xAA}, + {0x00, 0xAB, 0xAB}, {0x00, 0xAC, 0xAC}, {0x00, 0xAD, 0xAD}, + {0x00, 0xAE, 0xAE}, {0x01, 0xBF, 0xAF}, {0x00, 0xB0, 0xB0}, + {0x00, 0xB1, 0xB1}, {0x00, 0xB2, 0xB2}, {0x00, 0xB3, 0xB3}, + {0x00, 0xB4, 0xB4}, {0x00, 0xB5, 0xB5}, {0x00, 0xB6, 0xB6}, + {0x00, 0xB7, 0xB7}, {0x00, 0xB8, 0xA8}, {0x00, 0xB9, 0xB9}, + {0x00, 0xBA, 0xAA}, {0x00, 0xBB, 0xBB}, {0x00, 0xBC, 0xBC}, + {0x00, 0xBD, 0xBD}, {0x00, 0xBE, 0xBE}, {0x00, 0xBF, 0xAF}, + {0x01, 0xE0, 0xC0}, {0x01, 0xE1, 0xC1}, {0x01, 0xE2, 0xC2}, + {0x01, 0xE3, 0xC3}, {0x01, 0xE4, 0xC4}, {0x01, 0xE5, 0xC5}, + {0x01, 0xE6, 0xC6}, {0x01, 0xE7, 0xC7}, {0x01, 0xE8, 0xC8}, + {0x01, 0xE9, 0xC9}, {0x01, 0xEA, 0xCA}, {0x01, 0xEB, 0xCB}, + {0x01, 0xEC, 0xCC}, {0x01, 0xED, 0xCD}, {0x01, 0xEE, 0xCE}, + {0x01, 0xEF, 0xCF}, {0x01, 0xF0, 0xD0}, {0x01, 0xF1, 0xD1}, + {0x01, 0xF2, 0xD2}, {0x01, 0xF3, 0xD3}, {0x01, 0xF4, 0xD4}, + {0x01, 0xF5, 0xD5}, {0x01, 0xF6, 0xD6}, {0x00, 0xD7, 0xD7}, + {0x01, 0xF8, 0xD8}, {0x01, 0xF9, 0xD9}, {0x01, 0xFA, 0xDA}, + {0x01, 0xFB, 0xDB}, {0x01, 0xFC, 0xDC}, {0x01, 0xFD, 0xDD}, + {0x01, 0xFE, 0xDE}, {0x00, 0xDF, 0xDF}, {0x00, 0xE0, 0xC0}, + {0x00, 0xE1, 0xC1}, {0x00, 0xE2, 0xC2}, {0x00, 0xE3, 0xC3}, + {0x00, 0xE4, 0xC4}, {0x00, 0xE5, 0xC5}, {0x00, 0xE6, 0xC6}, + {0x00, 0xE7, 0xC7}, {0x00, 0xE8, 0xC8}, {0x00, 0xE9, 0xC9}, + {0x00, 0xEA, 0xCA}, {0x00, 0xEB, 0xCB}, {0x00, 0xEC, 0xCC}, + {0x00, 0xED, 0xCD}, {0x00, 0xEE, 0xCE}, {0x00, 0xEF, 0xCF}, + {0x00, 0xF0, 0xD0}, {0x00, 0xF1, 0xD1}, {0x00, 0xF2, 0xD2}, + {0x00, 0xF3, 0xD3}, {0x00, 0xF4, 0xD4}, {0x00, 0xF5, 0xD5}, + {0x00, 0xF6, 0xD6}, {0x00, 0xF7, 0xF7}, {0x00, 0xF8, 0xD8}, + {0x00, 0xF9, 0xD9}, {0x00, 0xFA, 0xDA}, {0x00, 0xFB, 0xDB}, + {0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE}, + {0x00, 0xFF, 0xFF}}; + +static struct cs_info iso14_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x01, 0xa2, 0xa1}, + {0x00, 0xa2, 0xa1}, {0x00, 0xa3, 0xa3}, {0x01, 0xa5, 0xa4}, + {0x00, 0xa5, 0xa4}, {0x01, 0xa6, 0xab}, {0x00, 0xa7, 0xa7}, + {0x01, 0xb8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x01, 0xba, 0xaa}, + {0x00, 0xab, 0xa6}, {0x01, 0xbc, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x01, 0xff, 0xaf}, {0x01, 0xb1, 0xb0}, + {0x00, 0xb1, 0xb0}, {0x01, 0xb3, 0xb2}, {0x00, 0xb3, 0xb2}, + {0x01, 0xb5, 0xb4}, {0x00, 0xb5, 0xb4}, {0x00, 0xb6, 0xb6}, + {0x01, 0xb9, 0xb7}, {0x00, 0xb8, 0xa8}, {0x00, 0xb9, 0xb6}, + {0x00, 0xba, 0xaa}, {0x01, 0xbf, 0xbb}, {0x00, 0xbc, 0xac}, + {0x01, 0xbe, 0xbd}, {0x00, 0xbe, 0xbd}, {0x00, 0xbf, 0xbb}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x01, 0xf7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xd7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xff}}; + +static struct cs_info iso15_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x01, 0xa8, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa6}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x01, 0xb8, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb4}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x01, 0xbd, 0xbc}, + {0x00, 0xbd, 0xbc}, {0x01, 0xff, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x01, 0xe0, 0xc0}, {0x01, 0xe1, 0xc1}, {0x01, 0xe2, 0xc2}, + {0x01, 0xe3, 0xc3}, {0x01, 0xe4, 0xc4}, {0x01, 0xe5, 0xc5}, + {0x01, 0xe6, 0xc6}, {0x01, 0xe7, 0xc7}, {0x01, 0xe8, 0xc8}, + {0x01, 0xe9, 0xc9}, {0x01, 0xea, 0xca}, {0x01, 0xeb, 0xcb}, + {0x01, 0xec, 0xcc}, {0x01, 0xed, 0xcd}, {0x01, 0xee, 0xce}, + {0x01, 0xef, 0xcf}, {0x01, 0xf0, 0xd0}, {0x01, 0xf1, 0xd1}, + {0x01, 0xf2, 0xd2}, {0x01, 0xf3, 0xd3}, {0x01, 0xf4, 0xd4}, + {0x01, 0xf5, 0xd5}, {0x01, 0xf6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x01, 0xf8, 0xd8}, {0x01, 0xf9, 0xd9}, {0x01, 0xfa, 0xda}, + {0x01, 0xfb, 0xdb}, {0x01, 0xfc, 0xdc}, {0x01, 0xfd, 0xdd}, + {0x01, 0xfe, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xc0}, + {0x00, 0xe1, 0xc1}, {0x00, 0xe2, 0xc2}, {0x00, 0xe3, 0xc3}, + {0x00, 0xe4, 0xc4}, {0x00, 0xe5, 0xc5}, {0x00, 0xe6, 0xc6}, + {0x00, 0xe7, 0xc7}, {0x00, 0xe8, 0xc8}, {0x00, 0xe9, 0xc9}, + {0x00, 0xea, 0xca}, {0x00, 0xeb, 0xcb}, {0x00, 0xec, 0xcc}, + {0x00, 0xed, 0xcd}, {0x00, 0xee, 0xce}, {0x00, 0xef, 0xcf}, + {0x00, 0xf0, 0xd0}, {0x00, 0xf1, 0xd1}, {0x00, 0xf2, 0xd2}, + {0x00, 0xf3, 0xd3}, {0x00, 0xf4, 0xd4}, {0x00, 0xf5, 0xd5}, + {0x00, 0xf6, 0xd6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xd8}, + {0x00, 0xf9, 0xd9}, {0x00, 0xfa, 0xda}, {0x00, 0xfb, 0xdb}, + {0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde}, + {0x00, 0xff, 0xbe}}; + +static struct cs_info iscii_devanagari_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +static struct cs_info tis620_tbl[] = { + {0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02}, + {0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05}, + {0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08}, + {0x00, 0x09, 0x09}, {0x00, 0x0a, 0x0a}, {0x00, 0x0b, 0x0b}, + {0x00, 0x0c, 0x0c}, {0x00, 0x0d, 0x0d}, {0x00, 0x0e, 0x0e}, + {0x00, 0x0f, 0x0f}, {0x00, 0x10, 0x10}, {0x00, 0x11, 0x11}, + {0x00, 0x12, 0x12}, {0x00, 0x13, 0x13}, {0x00, 0x14, 0x14}, + {0x00, 0x15, 0x15}, {0x00, 0x16, 0x16}, {0x00, 0x17, 0x17}, + {0x00, 0x18, 0x18}, {0x00, 0x19, 0x19}, {0x00, 0x1a, 0x1a}, + {0x00, 0x1b, 0x1b}, {0x00, 0x1c, 0x1c}, {0x00, 0x1d, 0x1d}, + {0x00, 0x1e, 0x1e}, {0x00, 0x1f, 0x1f}, {0x00, 0x20, 0x20}, + {0x00, 0x21, 0x21}, {0x00, 0x22, 0x22}, {0x00, 0x23, 0x23}, + {0x00, 0x24, 0x24}, {0x00, 0x25, 0x25}, {0x00, 0x26, 0x26}, + {0x00, 0x27, 0x27}, {0x00, 0x28, 0x28}, {0x00, 0x29, 0x29}, + {0x00, 0x2a, 0x2a}, {0x00, 0x2b, 0x2b}, {0x00, 0x2c, 0x2c}, + {0x00, 0x2d, 0x2d}, {0x00, 0x2e, 0x2e}, {0x00, 0x2f, 0x2f}, + {0x00, 0x30, 0x30}, {0x00, 0x31, 0x31}, {0x00, 0x32, 0x32}, + {0x00, 0x33, 0x33}, {0x00, 0x34, 0x34}, {0x00, 0x35, 0x35}, + {0x00, 0x36, 0x36}, {0x00, 0x37, 0x37}, {0x00, 0x38, 0x38}, + {0x00, 0x39, 0x39}, {0x00, 0x3a, 0x3a}, {0x00, 0x3b, 0x3b}, + {0x00, 0x3c, 0x3c}, {0x00, 0x3d, 0x3d}, {0x00, 0x3e, 0x3e}, + {0x00, 0x3f, 0x3f}, {0x00, 0x40, 0x40}, {0x01, 0x61, 0x41}, + {0x01, 0x62, 0x42}, {0x01, 0x63, 0x43}, {0x01, 0x64, 0x44}, + {0x01, 0x65, 0x45}, {0x01, 0x66, 0x46}, {0x01, 0x67, 0x47}, + {0x01, 0x68, 0x48}, {0x01, 0x69, 0x49}, {0x01, 0x6a, 0x4a}, + {0x01, 0x6b, 0x4b}, {0x01, 0x6c, 0x4c}, {0x01, 0x6d, 0x4d}, + {0x01, 0x6e, 0x4e}, {0x01, 0x6f, 0x4f}, {0x01, 0x70, 0x50}, + {0x01, 0x71, 0x51}, {0x01, 0x72, 0x52}, {0x01, 0x73, 0x53}, + {0x01, 0x74, 0x54}, {0x01, 0x75, 0x55}, {0x01, 0x76, 0x56}, + {0x01, 0x77, 0x57}, {0x01, 0x78, 0x58}, {0x01, 0x79, 0x59}, + {0x01, 0x7a, 0x5a}, {0x00, 0x5b, 0x5b}, {0x00, 0x5c, 0x5c}, + {0x00, 0x5d, 0x5d}, {0x00, 0x5e, 0x5e}, {0x00, 0x5f, 0x5f}, + {0x00, 0x60, 0x60}, {0x00, 0x61, 0x41}, {0x00, 0x62, 0x42}, + {0x00, 0x63, 0x43}, {0x00, 0x64, 0x44}, {0x00, 0x65, 0x45}, + {0x00, 0x66, 0x46}, {0x00, 0x67, 0x47}, {0x00, 0x68, 0x48}, + {0x00, 0x69, 0x49}, {0x00, 0x6a, 0x4a}, {0x00, 0x6b, 0x4b}, + {0x00, 0x6c, 0x4c}, {0x00, 0x6d, 0x4d}, {0x00, 0x6e, 0x4e}, + {0x00, 0x6f, 0x4f}, {0x00, 0x70, 0x50}, {0x00, 0x71, 0x51}, + {0x00, 0x72, 0x52}, {0x00, 0x73, 0x53}, {0x00, 0x74, 0x54}, + {0x00, 0x75, 0x55}, {0x00, 0x76, 0x56}, {0x00, 0x77, 0x57}, + {0x00, 0x78, 0x58}, {0x00, 0x79, 0x59}, {0x00, 0x7a, 0x5a}, + {0x00, 0x7b, 0x7b}, {0x00, 0x7c, 0x7c}, {0x00, 0x7d, 0x7d}, + {0x00, 0x7e, 0x7e}, {0x00, 0x7f, 0x7f}, {0x00, 0x80, 0x80}, + {0x00, 0x81, 0x81}, {0x00, 0x82, 0x82}, {0x00, 0x83, 0x83}, + {0x00, 0x84, 0x84}, {0x00, 0x85, 0x85}, {0x00, 0x86, 0x86}, + {0x00, 0x87, 0x87}, {0x00, 0x88, 0x88}, {0x00, 0x89, 0x89}, + {0x00, 0x8a, 0x8a}, {0x00, 0x8b, 0x8b}, {0x00, 0x8c, 0x8c}, + {0x00, 0x8d, 0x8d}, {0x00, 0x8e, 0x8e}, {0x00, 0x8f, 0x8f}, + {0x00, 0x90, 0x90}, {0x00, 0x91, 0x91}, {0x00, 0x92, 0x92}, + {0x00, 0x93, 0x93}, {0x00, 0x94, 0x94}, {0x00, 0x95, 0x95}, + {0x00, 0x96, 0x96}, {0x00, 0x97, 0x97}, {0x00, 0x98, 0x98}, + {0x00, 0x99, 0x99}, {0x00, 0x9a, 0x9a}, {0x00, 0x9b, 0x9b}, + {0x00, 0x9c, 0x9c}, {0x00, 0x9d, 0x9d}, {0x00, 0x9e, 0x9e}, + {0x00, 0x9f, 0x9f}, {0x00, 0xa0, 0xa0}, {0x00, 0xa1, 0xa1}, + {0x00, 0xa2, 0xa2}, {0x00, 0xa3, 0xa3}, {0x00, 0xa4, 0xa4}, + {0x00, 0xa5, 0xa5}, {0x00, 0xa6, 0xa6}, {0x00, 0xa7, 0xa7}, + {0x00, 0xa8, 0xa8}, {0x00, 0xa9, 0xa9}, {0x00, 0xaa, 0xaa}, + {0x00, 0xab, 0xab}, {0x00, 0xac, 0xac}, {0x00, 0xad, 0xad}, + {0x00, 0xae, 0xae}, {0x00, 0xaf, 0xaf}, {0x00, 0xb0, 0xb0}, + {0x00, 0xb1, 0xb1}, {0x00, 0xb2, 0xb2}, {0x00, 0xb3, 0xb3}, + {0x00, 0xb4, 0xb4}, {0x00, 0xb5, 0xb5}, {0x00, 0xb6, 0xb6}, + {0x00, 0xb7, 0xb7}, {0x00, 0xb8, 0xb8}, {0x00, 0xb9, 0xb9}, + {0x00, 0xba, 0xba}, {0x00, 0xbb, 0xbb}, {0x00, 0xbc, 0xbc}, + {0x00, 0xbd, 0xbd}, {0x00, 0xbe, 0xbe}, {0x00, 0xbf, 0xbf}, + {0x00, 0xc0, 0xc0}, {0x00, 0xc1, 0xc1}, {0x00, 0xc2, 0xc2}, + {0x00, 0xc3, 0xc3}, {0x00, 0xc4, 0xc4}, {0x00, 0xc5, 0xc5}, + {0x00, 0xc6, 0xc6}, {0x00, 0xc7, 0xc7}, {0x00, 0xc8, 0xc8}, + {0x00, 0xc9, 0xc9}, {0x00, 0xca, 0xca}, {0x00, 0xcb, 0xcb}, + {0x00, 0xcc, 0xcc}, {0x00, 0xcd, 0xcd}, {0x00, 0xce, 0xce}, + {0x00, 0xcf, 0xcf}, {0x00, 0xd0, 0xd0}, {0x00, 0xd1, 0xd1}, + {0x00, 0xd2, 0xd2}, {0x00, 0xd3, 0xd3}, {0x00, 0xd4, 0xd4}, + {0x00, 0xd5, 0xd5}, {0x00, 0xd6, 0xd6}, {0x00, 0xd7, 0xd7}, + {0x00, 0xd8, 0xd8}, {0x00, 0xd9, 0xd9}, {0x00, 0xda, 0xda}, + {0x00, 0xdb, 0xdb}, {0x00, 0xdc, 0xdc}, {0x00, 0xdd, 0xdd}, + {0x00, 0xde, 0xde}, {0x00, 0xdf, 0xdf}, {0x00, 0xe0, 0xe0}, + {0x00, 0xe1, 0xe1}, {0x00, 0xe2, 0xe2}, {0x00, 0xe3, 0xe3}, + {0x00, 0xe4, 0xe4}, {0x00, 0xe5, 0xe5}, {0x00, 0xe6, 0xe6}, + {0x00, 0xe7, 0xe7}, {0x00, 0xe8, 0xe8}, {0x00, 0xe9, 0xe9}, + {0x00, 0xea, 0xea}, {0x00, 0xeb, 0xeb}, {0x00, 0xec, 0xec}, + {0x00, 0xed, 0xed}, {0x00, 0xee, 0xee}, {0x00, 0xef, 0xef}, + {0x00, 0xf0, 0xf0}, {0x00, 0xf1, 0xf1}, {0x00, 0xf2, 0xf2}, + {0x00, 0xf3, 0xf3}, {0x00, 0xf4, 0xf4}, {0x00, 0xf5, 0xf5}, + {0x00, 0xf6, 0xf6}, {0x00, 0xf7, 0xf7}, {0x00, 0xf8, 0xf8}, + {0x00, 0xf9, 0xf9}, {0x00, 0xfa, 0xfa}, {0x00, 0xfb, 0xfb}, + {0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe}, + {0x00, 0xff, 0xff}}; + +struct enc_entry { + const char* enc_name; + struct cs_info* cs_table; +}; + +static struct enc_entry encds[] = { + {"iso88591", iso1_tbl}, // ISO-8859-1 + {"iso88592", iso2_tbl}, // ISO-8859-2 + {"iso88593", iso3_tbl}, // ISO-8859-3 + {"iso88594", iso4_tbl}, // ISO-8859-4 + {"iso88595", iso5_tbl}, // ISO-8859-5 + {"iso88596", iso6_tbl}, // ISO-8859-6 + {"iso88597", iso7_tbl}, // ISO-8859-7 + {"iso88598", iso8_tbl}, // ISO-8859-8 + {"iso88599", iso9_tbl}, // ISO-8859-9 + {"iso885910", iso10_tbl}, // ISO-8859-10 + {"tis620", tis620_tbl}, // TIS-620/ISO-8859-11 + {"tis6202533", tis620_tbl}, // TIS-620/ISO-8859-11 + {"iso885911", tis620_tbl}, // TIS-620/ISO-8859-11 + {"iso885913", iso13_tbl}, // ISO-8859-13 + {"iso885914", iso14_tbl}, // ISO-8859-14 + {"iso885915", iso15_tbl}, // ISO-8859-15 + {"koi8r", koi8r_tbl}, // KOI8-R + {"koi8u", koi8u_tbl}, // KOI8-U + {"cp1251", cp1251_tbl}, // CP-1251 + {"microsoftcp1251", cp1251_tbl}, // microsoft-cp1251 + {"xisciias", iscii_devanagari_tbl}, // x-iscii-as + {"isciidevanagari", iscii_devanagari_tbl} // ISCII-DEVANAGARI +}; + +/* map to lower case and remove non alphanumeric chars */ +static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName, + char* pBuf) { + while (*pName) { + /* A-Z */ + if ((*pName >= 0x41) && (*pName <= 0x5A)) { + *pBuf = (*pName) + 0x20; /* toAsciiLower */ + pBuf++; + } + /* a-z, 0-9 */ + else if (((*pName >= 0x61) && (*pName <= 0x7A)) || + ((*pName >= 0x30) && (*pName <= 0x39))) { + *pBuf = *pName; + pBuf++; + } + + pName++; + } + + *pBuf = '\0'; +} + +struct cs_info* get_current_cs(const std::string& es) { + char* normalized_encoding = new char[es.size() + 1]; + toAsciiLowerAndRemoveNonAlphanumeric(es.c_str(), normalized_encoding); + + struct cs_info* ccs = NULL; + int n = sizeof(encds) / sizeof(encds[0]); + for (int i = 0; i < n; i++) { + if (strcmp(normalized_encoding, encds[i].enc_name) == 0) { + ccs = encds[i].cs_table; + break; + } + } + + delete[] normalized_encoding; + + if (!ccs) { + HUNSPELL_WARNING(stderr, + "error: unknown encoding %s: using %s as fallback\n", es.c_str(), + encds[0].enc_name); + ccs = encds[0].cs_table; + } + + return ccs; +} +#else +struct cs_info* get_current_cs(const std::string& es) { + return moz_hunspell_GetCurrentCS(es.c_str()); +} +#endif + +// primitive isalpha() replacement for tokenization +std::string get_casechars(const char* enc) { + struct cs_info* csconv = get_current_cs(enc); + std::string expw; + for (int i = 0; i <= 255; ++i) { + if (cupper(csconv, i) != clower(csconv, i)) { + expw.push_back(static_cast<char>(i)); + } + } +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif + return expw; +} + +// language to encoding default map + +struct lang_map { + const char* lang; + int num; +}; + +static struct lang_map lang2enc[] = + {{"ar", LANG_ar}, {"az", LANG_az}, + {"az_AZ", LANG_az}, // for back-compatibility + {"bg", LANG_bg}, {"ca", LANG_ca}, + {"crh", LANG_crh}, + {"cs", LANG_cs}, {"da", LANG_da}, + {"de", LANG_de}, {"el", LANG_el}, + {"en", LANG_en}, {"es", LANG_es}, + {"eu", LANG_eu}, {"gl", LANG_gl}, + {"fr", LANG_fr}, {"hr", LANG_hr}, + {"hu", LANG_hu}, {"hu_HU", LANG_hu}, // for back-compatibility + {"it", LANG_it}, {"la", LANG_la}, + {"lv", LANG_lv}, {"nl", LANG_nl}, + {"pl", LANG_pl}, {"pt", LANG_pt}, + {"sv", LANG_sv}, {"tr", LANG_tr}, + {"tr_TR", LANG_tr}, // for back-compatibility + {"ru", LANG_ru}, {"uk", LANG_uk}}; + +int get_lang_num(const std::string& lang) { + int n = sizeof(lang2enc) / sizeof(lang2enc[0]); + for (int i = 0; i < n; i++) { + if (strcmp(lang.c_str(), lang2enc[i].lang) == 0) { + return lang2enc[i].num; + } + } + return LANG_xx; +} + +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT +void initialize_utf_tbl() { + utf_tbl_count++; + if (utf_tbl) + return; + utf_tbl = new unicode_info2[CONTSIZE]; + for (size_t j = 0; j < CONTSIZE; ++j) { + utf_tbl[j].cletter = 0; + utf_tbl[j].clower = (unsigned short)j; + utf_tbl[j].cupper = (unsigned short)j; + } + for (size_t j = 0; j < UTF_LST_LEN; ++j) { + utf_tbl[utf_lst[j].c].cletter = 1; + utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower; + utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper; + } +} +#endif +#endif + +void free_utf_tbl() { + if (utf_tbl_count > 0) + utf_tbl_count--; + if (utf_tbl && (utf_tbl_count == 0)) { + delete[] utf_tbl; + utf_tbl = NULL; + } +} + +unsigned short unicodetoupper(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0130; +#ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_toupper(c)); +#else +#ifdef MOZILLA_CLIENT + return moz_hunspell_ToUpperCase((char16_t)c); +#else + return (utf_tbl) ? utf_tbl[c].cupper : c; +#endif +#endif +} + +unsigned short unicodetolower(unsigned short c, int langnum) { + // In Azeri and Turkish, I and i dictinct letters: + // There are a dotless lower case i pair of upper `I', + // and an upper I with dot pair of lower `i'. + if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr) || (langnum == LANG_crh))) + return 0x0131; +#ifdef OPENOFFICEORG + return static_cast<unsigned short>(u_tolower(c)); +#else +#ifdef MOZILLA_CLIENT + return moz_hunspell_ToLowerCase((char16_t)c); +#else + return (utf_tbl) ? utf_tbl[c].clower : c; +#endif +#endif +} + +int unicodeisalpha(unsigned short c) { +#ifdef OPENOFFICEORG + return u_isalpha(c); +#else + return (utf_tbl) ? utf_tbl[c].cletter : 0; +#endif +} + +/* get type of capitalization */ +int get_captype(const std::string& word, cs_info* csconv) { + // now determine the capitalization type of the first nl letters + size_t ncap = 0; + size_t nneutral = 0; + size_t firstcap = 0; + if (csconv == NULL) + return NOCAP; + for (std::string::const_iterator q = word.begin(); q != word.end(); ++q) { + unsigned char nIndex = static_cast<unsigned char>(*q); + if (ccase(csconv, nIndex)) + ncap++; + if (cupper(csconv, nIndex) == clower(csconv, nIndex)) + nneutral++; + } + if (ncap) { + unsigned char nIndex = static_cast<unsigned char>(word[0]); + firstcap = csconv[nIndex].ccase; + } + + // now finally set the captype + if (ncap == 0) { + return NOCAP; + } else if ((ncap == 1) && firstcap) { + return INITCAP; + } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) { + return ALLCAP; + } else if ((ncap > 1) && firstcap) { + return HUHINITCAP; + } + return HUHCAP; +} + +int get_captype_utf8(const std::vector<w_char>& word, int langnum) { + // now determine the capitalization type of the first nl letters + size_t ncap = 0; + size_t nneutral = 0; + size_t firstcap = 0; + + std::vector<w_char>::const_iterator it = word.begin(); + std::vector<w_char>::const_iterator it_end = word.end(); + while (it != it_end) { + unsigned short idx = (it->h << 8) + it->l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) + ncap++; + if (unicodetoupper(idx, langnum) == lwridx) + nneutral++; + ++it; + } + if (ncap) { + unsigned short idx = (word[0].h << 8) + word[0].l; + firstcap = (idx != unicodetolower(idx, langnum)); + } + + // now finally set the captype + if (ncap == 0) { + return NOCAP; + } else if ((ncap == 1) && firstcap) { + return INITCAP; + } else if ((ncap == word.size()) || ((ncap + nneutral) == word.size())) { + return ALLCAP; + } else if ((ncap > 1) && firstcap) { + return HUHINITCAP; + } + return HUHCAP; +} + +// strip all ignored characters in the string +size_t remove_ignored_chars_utf(std::string& word, + const std::vector<w_char>& ignored_chars) { + std::vector<w_char> w; + std::vector<w_char> w2; + u8_u16(w, word); + + for (size_t i = 0; i < w.size(); ++i) { + if (!std::binary_search(ignored_chars.begin(), + ignored_chars.end(), + w[i])) { + w2.push_back(w[i]); + } + } + + u16_u8(word, w2); + return w2.size(); +} + +// strip all ignored characters in the string +size_t remove_ignored_chars(std::string& word, + const std::string& ignored_chars) { + word.erase( + std::remove_if(word.begin(), word.end(), is_any_of(ignored_chars)), + word.end()); + return word.size(); +} + +bool parse_string(const std::string& line, std::string& out, int ln) { + if (!out.empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln); + return false; + } + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + out.assign(start_piece, iter); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln); + return false; + } + return true; +} + +bool parse_array(const std::string& line, + std::string& out, + std::vector<w_char>& out_utf16, + int utf8, + int ln) { + if (!parse_string(line, out, ln)) + return false; + if (utf8) { + u8_u16(out_utf16, out); + std::sort(out_utf16.begin(), out_utf16.end()); + } + return true; +} diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx new file mode 100644 index 0000000000..96f15c1469 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/csutil.hxx @@ -0,0 +1,323 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CSUTIL_HXX_ +#define CSUTIL_HXX_ + +#include "hunvisapi.h" + +// First some base level utility routines + +#include <fstream> +#include <string> +#include <vector> +#include <string.h> +#include "w_char.hxx" +#include "htypes.hxx" + +// casing +#define NOCAP 0 +#define INITCAP 1 +#define ALLCAP 2 +#define HUHCAP 3 +#define HUHINITCAP 4 + +// default encoding and keystring +#define SPELL_ENCODING "ISO8859-1" +#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm" + +// default morphological fields +#define MORPH_STEM "st:" +#define MORPH_ALLOMORPH "al:" +#define MORPH_POS "po:" +#define MORPH_DERI_PFX "dp:" +#define MORPH_INFL_PFX "ip:" +#define MORPH_TERM_PFX "tp:" +#define MORPH_DERI_SFX "ds:" +#define MORPH_INFL_SFX "is:" +#define MORPH_TERM_SFX "ts:" +#define MORPH_SURF_PFX "sp:" +#define MORPH_FREQ "fr:" +#define MORPH_PHON "ph:" +#define MORPH_HYPH "hy:" +#define MORPH_PART "pa:" +#define MORPH_FLAG "fl:" +#define MORPH_HENTRY "_H:" +#define MORPH_TAG_LEN strlen(MORPH_STEM) + +#define MSEP_FLD ' ' +#define MSEP_REC '\n' +#define MSEP_ALT '\v' + +// default flags +#define DEFAULTFLAGS 65510 +#define FORBIDDENWORD 65510 +#define ONLYUPCASEFLAG 65511 + +// fix long pathname problem of WIN32 by using w_char std::fstream::open override +LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path, + std::ios_base::openmode mode); + +// convert UTF-16 characters to UTF-8 +LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest, + const std::vector<w_char>& src); + +// convert UTF-8 characters to UTF-16 +LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest, + const std::string& src); + +// remove end of line char(s) +LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s); + +// duplicate string +LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s); + +// parse into tokens with char delimiter +LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str, + std::string::const_iterator& start); + +// replace pat by rep in word and return word +LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str, + const std::string& search, + const std::string& replace); + +// append s to ends of every lines in text +LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str, + const std::string& apd); + +// tokenize into lines with new line +LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text, + char breakchar); + +// tokenize into lines with new line and uniq in place +LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar); + +LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar); + +// reverse word +LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word); + +// reverse word +LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&); + +// remove duplicates +LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list); + +// character encoding information +struct cs_info { + unsigned char ccase; + unsigned char clower; + unsigned char cupper; +}; + +LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl(); +LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl(); +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, + int langnum); +LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum); +LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum); +LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, + int langnum); +LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c); + +LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es); + +// get language identifiers of language codes +LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang); + +// get characters of the given 8bit encoding with lower- and uppercase forms +LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc); + +// convert std::string to all caps +LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s, + const struct cs_info* csconv); + +// convert null terminated string to all little +LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s, + const struct cs_info* csconv); + +// convert first letter of string to little +LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s, + const struct cs_info* csconv); + +// convert first letter of string to capital +LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s, + const struct cs_info* csconv); + +// convert first letter of UTF-8 string to capital +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkinitcap_utf(std::vector<w_char>& u, int langnum); + +// convert UTF-8 string to little +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkallsmall_utf(std::vector<w_char>& u, int langnum); + +// convert first letter of UTF-8 string to little +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkinitsmall_utf(std::vector<w_char>& u, int langnum); + +// convert UTF-8 string to capital +LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>& +mkallcap_utf(std::vector<w_char>& u, int langnum); + +// get type of capitalization +LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*); + +// get type of capitalization (UTF-8) +LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum); + +// strip all ignored characters in the string +LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf( + std::string& word, + const std::vector<w_char>& ignored_chars); + +// strip all ignored characters in the string +LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars( + std::string& word, + const std::string& ignored_chars); + +LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line, + std::string& out, + int ln); + +LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line, + std::string& out, + std::vector<w_char>& out_utf16, + int utf8, + int ln); + +LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r); + +LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest, + const std::string& morph, + const std::string& var); + +// conversion function for protected memory +LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source); + +// conversion function for protected memory +LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s); + + +// to avoid unnecessary string copies and Unicode conversions +// we simply check the ignored_chars characters in the word +// (in the case of UTF-8 encoded strings, "false" means +// "likely false", if ignored_chars characters are not ASCII) +inline bool has_no_ignored_chars(const std::string& word, + const std::string& ignored_chars) { + for (std::string::const_iterator it = ignored_chars.begin(), end = ignored_chars.end(); it != end; ++it) + if (word.find(*it) != std::string::npos) + return false; + return true; +} + +// hash entry macros +inline char* HENTRY_DATA(struct hentry* h) { + char* ret; + if (!(h->var & H_OPT)) + ret = NULL; + else if (h->var & H_OPT_ALIASM) + ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); + else + ret = HENTRY_WORD(h) + h->blen + 1; + return ret; +} + +inline const char* HENTRY_DATA( + const struct hentry* h) { + const char* ret; + if (!(h->var & H_OPT)) + ret = NULL; + else if (h->var & H_OPT_ALIASM) + ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); + else + ret = HENTRY_WORD(h) + h->blen + 1; + return ret; +} + +// NULL-free version for warning-free OOo build +inline const char* HENTRY_DATA2( + const struct hentry* h) { + const char* ret; + if (!(h->var & H_OPT)) + ret = ""; + else if (h->var & H_OPT_ALIASM) + ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1); + else + ret = HENTRY_WORD(h) + h->blen + 1; + return ret; +} + +inline char* HENTRY_FIND(struct hentry* h, + const char* p) { + return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL); +} + +#endif diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx new file mode 100644 index 0000000000..7773a321a6 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/filemgr.hxx @@ -0,0 +1,77 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* file manager class - read lines of files [filename] OR [filename.hz] */ +#ifndef FILEMGR_HXX_ +#define FILEMGR_HXX_ + +#include "mozHunspellRLBoxSandbox.h" + +#endif diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx new file mode 100644 index 0000000000..d22f2e7b7d --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx @@ -0,0 +1,1415 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <limits> +#include <sstream> + +#include "hashmgr.hxx" +#include "csutil.hxx" +#include "atypes.hxx" +#include "langnum.hxx" + +// build a hash table from a munched word list + +HashMgr::HashMgr(const char* tpath, const char* apath, const char* key) + : tablesize(0), + tableptr(NULL), + flag_mode(FLAG_CHAR), + complexprefixes(0), + utf8(0), + forbiddenword(FORBIDDENWORD) // forbidden word signing flag + , + numaliasf(0), + aliasf(NULL), + aliasflen(0), + numaliasm(0), + aliasm(NULL) { + langnum = 0; + csconv = 0; + load_config(apath, key); + int ec = load_tables(tpath, key); + if (ec) { + /* error condition - what should we do here */ + HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n", ec); + free(tableptr); + //keep tablesize to 1 to fix possible division with zero + tablesize = 1; + tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); + if (!tableptr) { + tablesize = 0; + } + } +} + +HashMgr::~HashMgr() { + if (tableptr) { + // now pass through hash table freeing up everything + // go through column by column of the table + for (int i = 0; i < tablesize; i++) { + struct hentry* pt = tableptr[i]; + struct hentry* nt = NULL; + while (pt) { + nt = pt->next; + if (pt->astr && + (!aliasf || TESTAFF(pt->astr, ONLYUPCASEFLAG, pt->alen))) + arena_free(pt->astr); + arena_free(pt); + pt = nt; + } + } + free(tableptr); + } + tablesize = 0; + + if (aliasf) { + for (int j = 0; j < (numaliasf); j++) + arena_free(aliasf[j]); + arena_free(aliasf); + aliasf = NULL; + if (aliasflen) { + arena_free(aliasflen); + aliasflen = NULL; + } + } + if (aliasm) { + for (int j = 0; j < (numaliasm); j++) + arena_free(aliasm[j]); + arena_free(aliasm); + aliasm = NULL; + } + +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + if (utf8) + free_utf_tbl(); +#endif +#endif + +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif + + assert(outstanding_arena_allocations == 0); +} + +// lookup a root word in the hashtable + +struct hentry* HashMgr::lookup(const char* word) const { + struct hentry* dp; + if (tableptr) { + dp = tableptr[hash(word)]; + if (!dp) + return NULL; + for (; dp != NULL; dp = dp->next) { + if (strcmp(word, dp->word) == 0) + return dp; + } + } + return NULL; +} + +// add a word to the hash table (private) +int HashMgr::add_word(const std::string& in_word, + int wcl, + unsigned short* aff, + int al, + const std::string* in_desc, + bool onlyupcase, + int captype) { + const std::string* word = &in_word; + const std::string* desc = in_desc; + + std::string *word_copy = NULL; + std::string *desc_copy = NULL; + if ((!ignorechars.empty() && !has_no_ignored_chars(in_word, ignorechars)) || complexprefixes) { + word_copy = new std::string(in_word); + + if (!ignorechars.empty()) { + if (utf8) { + wcl = remove_ignored_chars_utf(*word_copy, ignorechars_utf16); + } else { + remove_ignored_chars(*word_copy, ignorechars); + } + } + + if (complexprefixes) { + if (utf8) + wcl = reverseword_utf(*word_copy); + else + reverseword(*word_copy); + + if (in_desc && !aliasm) { + desc_copy = new std::string(*in_desc); + + if (complexprefixes) { + if (utf8) + reverseword_utf(*desc_copy); + else + reverseword(*desc_copy); + } + desc = desc_copy; + } + } + + word = word_copy; + } + + bool upcasehomonym = false; + int descl = desc ? (aliasm ? sizeof(char*) : desc->size() + 1) : 0; + // variable-length hash record with word and optional fields + struct hentry* hp = + (struct hentry*)arena_alloc(sizeof(struct hentry) + word->size() + descl); + if (!hp) { + delete desc_copy; + delete word_copy; + return 1; + } + + char* hpw = hp->word; + strcpy(hpw, word->c_str()); + + int i = hash(hpw); + + hp->blen = (unsigned char)word->size(); + hp->clen = (unsigned char)wcl; + hp->alen = (short)al; + hp->astr = aff; + hp->next = NULL; + hp->next_homonym = NULL; + hp->var = (captype == INITCAP) ? H_OPT_INITCAP : 0; + + // store the description string or its pointer + if (desc) { + hp->var |= H_OPT; + if (aliasm) { + hp->var |= H_OPT_ALIASM; + store_pointer(hpw + word->size() + 1, get_aliasm(atoi(desc->c_str()))); + } else { + strcpy(hpw + word->size() + 1, desc->c_str()); + } + if (strstr(HENTRY_DATA(hp), MORPH_PHON)) { + hp->var |= H_OPT_PHON; + // store ph: fields (pronounciation, misspellings, old orthography etc.) + // of a morphological description in reptable to use in REP replacements. + if (reptable.capacity() < (unsigned int)(tablesize/MORPH_PHON_RATIO)) + reptable.reserve(tablesize/MORPH_PHON_RATIO); + std::string fields = HENTRY_DATA(hp); + std::string::const_iterator iter = fields.begin(); + std::string::const_iterator start_piece = mystrsep(fields, iter); + while (start_piece != fields.end()) { + if (std::string(start_piece, iter).find(MORPH_PHON) == 0) { + std::string ph = std::string(start_piece, iter).substr(sizeof MORPH_PHON - 1); + if (ph.size() > 0) { + std::vector<w_char> w; + size_t strippatt; + std::string wordpart; + // dictionary based REP replacement, separated by "->" + // for example "pretty ph:prity ph:priti->pretti" to handle + // both prity -> pretty and pritier -> prettiest suggestions. + if (((strippatt = ph.find("->")) != std::string::npos) && + (strippatt > 0) && (strippatt < ph.size() - 2)) { + wordpart = ph.substr(strippatt + 2); + ph.erase(ph.begin() + strippatt, ph.end()); + } else + wordpart = in_word; + // when the ph: field ends with the character *, + // strip last character of the pattern and the replacement + // to match in REP suggestions also at character changes, + // for example, "pretty ph:prity*" results "prit->prett" + // REP replacement instead of "prity->pretty", to get + // prity->pretty and pritiest->prettiest suggestions. + if (ph.at(ph.size()-1) == '*') { + strippatt = 1; + size_t stripword = 0; + if (utf8) { + while ((strippatt < ph.size()) && + ((ph.at(ph.size()-strippatt-1) & 0xc0) == 0x80)) + ++strippatt; + while ((stripword < wordpart.size()) && + ((wordpart.at(wordpart.size()-stripword-1) & 0xc0) == 0x80)) + ++stripword; + } + ++strippatt; + ++stripword; + if ((ph.size() > strippatt) && (wordpart.size() > stripword)) { + ph.erase(ph.size()-strippatt, strippatt); + wordpart.erase(in_word.size()-stripword, stripword); + } + } + // capitalize lowercase pattern for capitalized words to support + // good suggestions also for capitalized misspellings, eg. + // Wednesday ph:wendsay + // results wendsay -> Wednesday and Wendsay -> Wednesday, too. + if (captype==INITCAP) { + std::string ph_capitalized; + if (utf8) { + u8_u16(w, ph); + if (get_captype_utf8(w, langnum) == NOCAP) { + mkinitcap_utf(w, langnum); + u16_u8(ph_capitalized, w); + } + } else if (get_captype(ph, csconv) == NOCAP) + mkinitcap(ph_capitalized, csconv); + + if (ph_capitalized.size() > 0) { + // add also lowercase word in the case of German or + // Hungarian to support lowercase suggestions lowercased by + // compound word generation or derivational suffixes + // (for example by adjectival suffix "-i" of geographical + // names in Hungarian: + // Massachusetts ph:messzecsuzec + // messzecsuzeci -> massachusettsi (adjective) + // For lowercasing by conditional PFX rules, see + // tests/germancompounding test example or the + // Hungarian dictionary.) + if (langnum == LANG_de || langnum == LANG_hu) { + std::string wordpart_lower(wordpart); + if (utf8) { + u8_u16(w, wordpart_lower); + mkallsmall_utf(w, langnum); + u16_u8(wordpart_lower, w); + } else { + mkallsmall(wordpart_lower, csconv); + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph); + reptable.back().outstrings[0].assign(wordpart_lower); + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph_capitalized); + reptable.back().outstrings[0].assign(wordpart); + } + } + reptable.push_back(replentry()); + reptable.back().pattern.assign(ph); + reptable.back().outstrings[0].assign(wordpart); + } + } + start_piece = mystrsep(fields, iter); + } + } + } + + struct hentry* dp = tableptr[i]; + if (!dp) { + tableptr[i] = hp; + delete desc_copy; + delete word_copy; + return 0; + } + while (dp->next != NULL) { + if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { + arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; + arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + dp = dp->next; + } + if (strcmp(hp->word, dp->word) == 0) { + // remove hidden onlyupcase homonym + if (!onlyupcase) { + if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) { + arena_free(dp->astr); + dp->astr = hp->astr; + dp->alen = hp->alen; + arena_free(hp); + delete desc_copy; + delete word_copy; + return 0; + } else { + dp->next_homonym = hp; + } + } else { + upcasehomonym = true; + } + } + if (!upcasehomonym) { + dp->next = hp; + } else { + // remove hidden onlyupcase homonym + if (hp->astr) + arena_free(hp->astr); + arena_free(hp); + } + + delete desc_copy; + delete word_copy; + return 0; +} + +int HashMgr::add_hidden_capitalized_word(const std::string& word, + int wcl, + unsigned short* flags, + int flagslen, + const std::string* dp, + int captype) { + if (flags == NULL) + flagslen = 0; + + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == HUHCAP) || (captype == HUHINITCAP) || + ((captype == ALLCAP) && (flagslen != 0))) && + !((flagslen != 0) && TESTAFF(flags, forbiddenword, flagslen))) { + unsigned short* flags2 = + (unsigned short*)arena_alloc(sizeof(unsigned short) * (flagslen + 1)); + if (!flags2) + return 1; + if (flagslen) + memcpy(flags2, flags, flagslen * sizeof(unsigned short)); + flags2[flagslen] = ONLYUPCASEFLAG; + if (utf8) { + std::string st; + std::vector<w_char> w; + u8_u16(w, word); + mkallsmall_utf(w, langnum); + mkinitcap_utf(w, langnum); + u16_u8(st, w); + return add_word(st, wcl, flags2, flagslen + 1, dp, true, INITCAP); + } else { + std::string new_word(word); + mkallsmall(new_word, csconv); + mkinitcap(new_word, csconv); + int ret = add_word(new_word, wcl, flags2, flagslen + 1, dp, true, INITCAP); + return ret; + } + } + return 0; +} + +// detect captype and modify word length for UTF-8 encoding +int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf) { + int len; + if (utf8) { + len = u8_u16(workbuf, word); + *captype = get_captype_utf8(workbuf, langnum); + } else { + len = word.size(); + *captype = get_captype(word, csconv); + } + return len; +} + +int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { + std::vector<w_char> workbuf; + return get_clen_and_captype(word, captype, workbuf); +} + +// remove word (personal dictionary function for standalone applications) +int HashMgr::remove(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); + while (dp) { + if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) { + unsigned short* flags = + (unsigned short*)arena_alloc(sizeof(unsigned short) * (dp->alen + 1)); + if (!flags) + return 1; + for (int i = 0; i < dp->alen; i++) + flags[i] = dp->astr[i]; + flags[dp->alen] = forbiddenword; + arena_free(dp->astr); + dp->astr = flags; + dp->alen++; + std::sort(flags, flags + dp->alen); + } + dp = dp->next_homonym; + } + return 0; +} + +/* remove forbidden flag to add a personal word to the hash */ +int HashMgr::remove_forbidden_flag(const std::string& word) { + struct hentry* dp = lookup(word.c_str()); + if (!dp) + return 1; + while (dp) { + if (dp->astr && TESTAFF(dp->astr, forbiddenword, dp->alen)) + dp->alen = 0; // XXX forbidden words of personal dic. + dp = dp->next_homonym; + } + return 0; +} + +// add a custom dic. word to the hash table (public) +int HashMgr::add(const std::string& word) { + if (remove_forbidden_flag(word)) { + int captype; + int al = 0; + unsigned short* flags = NULL; + int wcl = get_clen_and_captype(word, &captype); + add_word(word, wcl, flags, al, NULL, false, captype); + return add_hidden_capitalized_word(word, wcl, flags, al, NULL, + captype); + } + return 0; +} + +int HashMgr::add_with_affix(const std::string& word, const std::string& example) { + // detect captype and modify word length for UTF-8 encoding + struct hentry* dp = lookup(example.c_str()); + remove_forbidden_flag(word); + if (dp && dp->astr) { + int captype; + int wcl = get_clen_and_captype(word, &captype); + if (aliasf) { + add_word(word, wcl, dp->astr, dp->alen, NULL, false, captype); + } else { + unsigned short* flags = + (unsigned short*) arena_alloc(dp->alen * sizeof(unsigned short)); + if (flags) { + memcpy((void*)flags, (void*)dp->astr, + dp->alen * sizeof(unsigned short)); + add_word(word, wcl, flags, dp->alen, NULL, false, captype); + } else + return 1; + } + return add_hidden_capitalized_word(word, wcl, dp->astr, + dp->alen, NULL, captype); + } + return 1; +} + +// walk the hash table entry by entry - null at end +// initialize: col=-1; hp = NULL; hp = walk_hashtable(&col, hp); +struct hentry* HashMgr::walk_hashtable(int& col, struct hentry* hp) const { + if (hp && hp->next != NULL) + return hp->next; + for (col++; col < tablesize; col++) { + if (tableptr[col]) + return tableptr[col]; + } + // null at end and reset to start + col = -1; + return NULL; +} + +// load a munched word list and build a hash table on the fly +int HashMgr::load_tables(const char* tpath, const char* key) { + // open dictionary file + FileMgr* dict = new FileMgr(tpath, key); + if (dict == NULL) + return 1; + + // first read the first line of file to get hash table size */ + std::string ts; + if (!dict->getline(ts)) { + HUNSPELL_WARNING(stderr, "error: empty dic file %s\n", tpath); + delete dict; + return 2; + } + mychomp(ts); + + /* remove byte order mark */ + if (ts.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) { + ts.erase(0, 3); + } + + tablesize = atoi(ts.c_str()); + + int nExtra = 5 + USERWORD; + + if (tablesize <= 0 || + (tablesize >= (std::numeric_limits<int>::max() - 1 - nExtra) / + int(sizeof(struct hentry*)))) { + HUNSPELL_WARNING( + stderr, "error: line 1: missing or bad word count in the dic file\n"); + delete dict; + return 4; + } + tablesize += nExtra; + if ((tablesize % 2) == 0) + tablesize++; + + // allocate the hash table + tableptr = (struct hentry**)calloc(tablesize, sizeof(struct hentry*)); + if (!tableptr) { + delete dict; + return 3; + } + + // loop through all words on much list and add to hash + // table and create word and affix strings + + std::vector<w_char> workbuf; + + while (dict->getline(ts)) { + mychomp(ts); + // split each line into word and morphological description + size_t dp_pos = 0; + while ((dp_pos = ts.find(':', dp_pos)) != std::string::npos) { + if ((dp_pos > 3) && (ts[dp_pos - 3] == ' ' || ts[dp_pos - 3] == '\t')) { + for (dp_pos -= 3; dp_pos > 0 && (ts[dp_pos-1] == ' ' || ts[dp_pos-1] == '\t'); --dp_pos) + ; + if (dp_pos == 0) { // missing word + dp_pos = std::string::npos; + } else { + ++dp_pos; + } + break; + } + ++dp_pos; + } + + // tabulator is the old morphological field separator + size_t dp2_pos = ts.find('\t'); + if (dp2_pos != std::string::npos && (dp_pos == std::string::npos || dp2_pos < dp_pos)) { + dp_pos = dp2_pos + 1; + } + + std::string dp; + if (dp_pos != std::string::npos) { + dp.assign(ts.substr(dp_pos)); + ts.resize(dp_pos - 1); + } + + // split each line into word and affix char strings + // "\/" signs slash in words (not affix separator) + // "/" at beginning of the line is word character (not affix separator) + size_t ap_pos = ts.find('/'); + while (ap_pos != std::string::npos) { + if (ap_pos == 0) { + ++ap_pos; + continue; + } else if (ts[ap_pos - 1] != '\\') + break; + // replace "\/" with "/" + ts.erase(ap_pos - 1, 1); + ap_pos = ts.find('/', ap_pos); + } + + unsigned short* flags; + int al; + if (ap_pos != std::string::npos && ap_pos != ts.size()) { + std::string ap(ts.substr(ap_pos + 1)); + ts.resize(ap_pos); + if (aliasf) { + int index = atoi(ap.c_str()); + al = get_aliasf(index, &flags, dict); + if (!al) { + HUNSPELL_WARNING(stderr, "error: line %d: bad flag vector alias\n", + dict->getlinenum()); + } + } else { + al = decode_flags(&flags, ap.c_str(), dict, /* arena = */ true); + if (al == -1) { + HUNSPELL_WARNING(stderr, "Can't allocate memory.\n"); + delete dict; + return 6; + } + std::sort(flags, flags + al); + } + } else { + al = 0; + flags = NULL; + } + + int captype; + int wcl = get_clen_and_captype(ts, &captype, workbuf); + const std::string *dp_str = dp.empty() ? NULL : &dp; + // add the word and its index plus its capitalized form optionally + if (add_word(ts, wcl, flags, al, dp_str, false, captype) || + add_hidden_capitalized_word(ts, wcl, flags, al, dp_str, captype)) { + delete dict; + return 5; + } + } + + delete dict; + return 0; +} + +// the hash function is a simple load and rotate +// algorithm borrowed +int HashMgr::hash(const char* word) const { + unsigned long hv = 0; + for (int i = 0; i < 4 && *word != 0; i++) + hv = (hv << 8) | (*word++); + while (*word != 0) { + ROTATE(hv, ROTATE_LEN); + hv ^= (*word++); + } + return (unsigned long)hv % tablesize; +} + +int HashMgr::decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const { + auto alloc = [arena, this](int n) { return arena ? this->arena_alloc(n) : malloc(n); }; + int len; + if (flags.empty()) { + *result = NULL; + return 0; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + len = flags.size(); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + for (int i = 0; i < len; i++) { + (*result)[i] = ((unsigned short)((unsigned char)flags[i * 2]) << 8) + + (unsigned char)flags[i * 2 + 1]; + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + len = 1; + unsigned short* dest; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i] == ',') + len++; + } + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + const char* src = flags.c_str(); + for (const char* p = src; *p; p++) { + if (*p == ',') { + int i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING( + stderr, "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short)i; + if (*dest == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + src = p + 1; + dest++; + } + } + int i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING(stderr, + "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + *dest = (unsigned short)i; + if (*dest == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + std::vector<w_char> w; + u8_u16(w, flags); + len = w.size(); + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + memcpy(*result, w.data(), len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + unsigned short* dest; + len = flags.size(); + *result = (unsigned short*)alloc(len * sizeof(unsigned short)); + if (!*result) + return -1; + dest = *result; + for (size_t i = 0; i < flags.size(); ++i) { + *dest = (unsigned char)flags[i]; + dest++; + } + } + } + return len; +} + +bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const { + if (flags.empty()) { + return false; + } + switch (flag_mode) { + case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz) + size_t len = flags.size(); + if (len % 2 == 1) + HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", + af->getlinenum()); + len /= 2; + result.reserve(result.size() + len); + for (size_t i = 0; i < len; ++i) { + result.push_back(((unsigned short)((unsigned char)flags[i * 2]) << 8) + + (unsigned char)flags[i * 2 + 1]); + } + break; + } + case FLAG_NUM: { // decimal numbers separated by comma (4521,23,233 -> 4521 + // 23 233) + const char* src = flags.c_str(); + for (const char* p = src; *p; p++) { + if (*p == ',') { + int i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING( + stderr, "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + result.push_back((unsigned short)i); + if (result.back() == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + src = p + 1; + } + } + int i = atoi(src); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING(stderr, + "error: line %d: flag id %d is too large (max: %d)\n", + af->getlinenum(), i, DEFAULTFLAGS - 1); + result.push_back((unsigned short)i); + if (result.back() == 0) + HUNSPELL_WARNING(stderr, "error: line %d: 0 is wrong flag id\n", + af->getlinenum()); + break; + } + case FLAG_UNI: { // UTF-8 characters + std::vector<w_char> w; + u8_u16(w, flags); + size_t len = w.size(); + size_t origsize = result.size(); + result.resize(origsize + len); + memcpy(result.data() + origsize, w.data(), len * sizeof(short)); + break; + } + default: { // Ispell's one-character flags (erfg -> e r f g) + result.reserve(flags.size()); + for (size_t i = 0; i < flags.size(); ++i) { + result.push_back((unsigned char)flags[i]); + } + } + } + return true; +} + +unsigned short HashMgr::decode_flag(const char* f) const { + unsigned short s = 0; + int i; + switch (flag_mode) { + case FLAG_LONG: + s = ((unsigned short)((unsigned char)f[0]) << 8) + (unsigned char)f[1]; + break; + case FLAG_NUM: + i = atoi(f); + if (i >= DEFAULTFLAGS) + HUNSPELL_WARNING(stderr, "error: flag id %d is too large (max: %d)\n", + i, DEFAULTFLAGS - 1); + s = (unsigned short)i; + break; + case FLAG_UNI: { + std::vector<w_char> w; + u8_u16(w, f); + if (!w.empty()) + memcpy(&s, w.data(), 1 * sizeof(short)); + break; + } + default: + s = *(unsigned char*)f; + } + if (s == 0) + HUNSPELL_WARNING(stderr, "error: 0 is wrong flag id\n"); + return s; +} + +// This function is only called by external consumers, and so using the default +// allocator with mystrdup is correct. +char* HashMgr::encode_flag(unsigned short f) const { + if (f == 0) + return mystrdup("(NULL)"); + std::string ch; + if (flag_mode == FLAG_LONG) { + ch.push_back((unsigned char)(f >> 8)); + ch.push_back((unsigned char)(f - ((f >> 8) << 8))); + } else if (flag_mode == FLAG_NUM) { + std::ostringstream stream; + stream << f; + ch = stream.str(); + } else if (flag_mode == FLAG_UNI) { + const w_char* w_c = (const w_char*)&f; + std::vector<w_char> w(w_c, w_c + 1); + u16_u8(ch, w); + } else { + ch.push_back((unsigned char)(f)); + } + return mystrdup(ch.c_str()); +} + +// read in aff file and set flag mode +int HashMgr::load_config(const char* affpath, const char* key) { + int firstline = 1; + + // open the affix file + FileMgr* afflst = new FileMgr(affpath, key); + if (!afflst) { + HUNSPELL_WARNING( + stderr, "Error - could not open affix description file %s\n", affpath); + return 1; + } + + // read in each line ignoring any that do not + // start with a known line type indicator + + std::string line; + while (afflst->getline(line)) { + mychomp(line); + + /* remove byte order mark */ + if (firstline) { + firstline = 0; + if (line.compare(0, 3, "\xEF\xBB\xBF", 3) == 0) { + line.erase(0, 3); + } + } + + /* parse in the try string */ + if ((line.compare(0, 4, "FLAG", 4) == 0) && line.size() > 4 && isspace(line[4])) { + if (flag_mode != FLAG_CHAR) { + HUNSPELL_WARNING(stderr, + "error: line %d: multiple definitions of the FLAG " + "affix file parameter\n", + afflst->getlinenum()); + } + if (line.find("long") != std::string::npos) + flag_mode = FLAG_LONG; + if (line.find("num") != std::string::npos) + flag_mode = FLAG_NUM; + if (line.find("UTF-8") != std::string::npos) + flag_mode = FLAG_UNI; + if (flag_mode == FLAG_CHAR) { + HUNSPELL_WARNING( + stderr, + "error: line %d: FLAG needs `num', `long' or `UTF-8' parameter\n", + afflst->getlinenum()); + } + } + + if (line.compare(0, 13, "FORBIDDENWORD", 13) == 0) { + std::string st; + if (!parse_string(line, st, afflst->getlinenum())) { + delete afflst; + return 1; + } + forbiddenword = decode_flag(st.c_str()); + } + + if (line.compare(0, 3, "SET", 3) == 0) { + if (!parse_string(line, enc, afflst->getlinenum())) { + delete afflst; + return 1; + } + if (enc == "UTF-8") { + utf8 = 1; +#ifndef OPENOFFICEORG +#ifndef MOZILLA_CLIENT + initialize_utf_tbl(); +#endif +#endif + } else + csconv = get_current_cs(enc); + } + + if (line.compare(0, 4, "LANG", 4) == 0) { + if (!parse_string(line, lang, afflst->getlinenum())) { + delete afflst; + return 1; + } + langnum = get_lang_num(lang); + } + + /* parse in the ignored characters (for example, Arabic optional diacritics + * characters */ + if (line.compare(0, 6, "IGNORE", 6) == 0) { + if (!parse_array(line, ignorechars, ignorechars_utf16, + utf8, afflst->getlinenum())) { + delete afflst; + return 1; + } + } + + if ((line.compare(0, 2, "AF", 2) == 0) && line.size() > 2 && isspace(line[2])) { + if (!parse_aliasf(line, afflst)) { + delete afflst; + return 1; + } + } + + if ((line.compare(0, 2, "AM", 2) == 0) && line.size() > 2 && isspace(line[2])) { + if (!parse_aliasm(line, afflst)) { + delete afflst; + return 1; + } + } + + if (line.compare(0, 15, "COMPLEXPREFIXES", 15) == 0) + complexprefixes = 1; + + /* parse in the typical fault correcting table */ + if (line.compare(0, 3, "REP", 3) == 0) { + if (!parse_reptable(line, afflst)) { + delete afflst; + return 1; + } + } + + // don't check the full affix file, yet + if (((line.compare(0, 3, "SFX", 3) == 0) || + (line.compare(0, 3, "PFX", 3) == 0)) && + line.size() > 3 && isspace(line[3]) && + !reptable.empty()) // (REP table is in the end of Afrikaans aff file) + break; + } + + if (csconv == NULL) + csconv = get_current_cs(SPELL_ENCODING); + delete afflst; + return 0; +} + +/* parse in the ALIAS table */ +bool HashMgr::parse_aliasf(const std::string& line, FileMgr* af) { + if (numaliasf != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numaliasf = atoi(std::string(start_piece, iter).c_str()); + if (numaliasf < 1) { + numaliasf = 0; + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + aliasf = + (unsigned short**)arena_alloc(numaliasf * sizeof(unsigned short*)); + aliasflen = + (unsigned short*)arena_alloc(numaliasf * sizeof(unsigned short)); + if (!aliasf || !aliasflen) { + numaliasf = 0; + if (aliasf) + arena_free(aliasf); + if (aliasflen) + arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasf = 0; + arena_free(aliasf); + arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasf lines to read in the remainder of the table */ + for (int j = 0; j < numaliasf; j++) { + std::string nl; + aliasf[j] = NULL; + aliasflen[j] = 0; + i = 0; + if (af->getline(nl)) { + mychomp(nl); + iter = nl.begin(); + start_piece = mystrsep(nl, iter); + bool errored = false; + while (!errored && start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 2, "AF", 2) != 0) { + errored = true; + break; + } + break; + } + case 1: { + std::string piece(start_piece, iter); + aliasflen[j] = + (unsigned short)decode_flags(&(aliasf[j]), piece, af, /* arena = */ true); + std::sort(aliasf[j], aliasf[j] + aliasflen[j]); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasf[j]) { + for (int k = 0; k < j; ++k) { + arena_free(aliasf[k]); + } + arena_free(aliasf); + arena_free(aliasflen); + aliasf = NULL; + aliasflen = NULL; + numaliasf = 0; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; +} + +int HashMgr::is_aliasf() const { + return (aliasf != NULL); +} + +int HashMgr::get_aliasf(int index, unsigned short** fvec, FileMgr* af) const { + if ((index > 0) && (index <= numaliasf)) { + *fvec = aliasf[index - 1]; + return aliasflen[index - 1]; + } + HUNSPELL_WARNING(stderr, "error: line %d: bad flag alias index: %d\n", + af->getlinenum(), index); + *fvec = NULL; + return 0; +} + +/* parse morph alias definitions */ +bool HashMgr::parse_aliasm(const std::string& line, FileMgr* af) { + if (numaliasm != 0) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numaliasm = atoi(std::string(start_piece, iter).c_str()); + if (numaliasm < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", + af->getlinenum()); + return false; + } + aliasm = (char**)arena_alloc(numaliasm * sizeof(char*)); + if (!aliasm) { + numaliasm = 0; + return false; + } + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + numaliasm = 0; + arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numaliasm lines to read in the remainder of the table */ + for (int j = 0; j < numaliasm; j++) { + std::string nl; + aliasm[j] = NULL; + if (af->getline(nl)) { + mychomp(nl); + iter = nl.begin(); + i = 0; + start_piece = mystrsep(nl, iter); + bool errored = false; + while (!errored && start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 2, "AM", 2) != 0) { + errored = true; + break; + } + break; + } + case 1: { + // add the remaining of the line + std::string::const_iterator end = nl.end(); + std::string chunk(start_piece, end); + if (complexprefixes) { + if (utf8) + reverseword_utf(chunk); + else + reverseword(chunk); + } + size_t sl = chunk.length() + 1; + aliasm[j] = (char*)arena_alloc(sl); + if (aliasm[j]) { + memcpy(aliasm[j], chunk.c_str(), sl); + } + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (!aliasm[j]) { + numaliasm = 0; + for (int k = 0; k < j; ++k) { + arena_free(aliasm[k]); + } + arena_free(aliasm); + aliasm = NULL; + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + return false; + } + } + return true; +} + +int HashMgr::is_aliasm() const { + return (aliasm != NULL); +} + +char* HashMgr::get_aliasm(int index) const { + if ((index > 0) && (index <= numaliasm)) + return aliasm[index - 1]; + HUNSPELL_WARNING(stderr, "error: bad morph. alias index: %d\n", index); + return NULL; +} + +/* parse in the typical fault correcting table */ +bool HashMgr::parse_reptable(const std::string& line, FileMgr* af) { + if (!reptable.empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", + af->getlinenum()); + return false; + } + int numrep = -1; + int i = 0; + int np = 0; + std::string::const_iterator iter = line.begin(); + std::string::const_iterator start_piece = mystrsep(line, iter); + while (start_piece != line.end()) { + switch (i) { + case 0: { + np++; + break; + } + case 1: { + numrep = atoi(std::string(start_piece, iter).c_str()); + if (numrep < 1) { + HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", + af->getlinenum()); + return false; + } + reptable.reserve(numrep); + np++; + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(line, iter); + } + if (np != 2) { + HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", + af->getlinenum()); + return false; + } + + /* now parse the numrep lines to read in the remainder of the table */ + for (int j = 0; j < numrep; ++j) { + std::string nl; + reptable.push_back(replentry()); + int type = 0; + if (af->getline(nl)) { + mychomp(nl); + iter = nl.begin(); + i = 0; + start_piece = mystrsep(nl, iter); + bool errored = false; + while (!errored && start_piece != nl.end()) { + switch (i) { + case 0: { + if (nl.compare(start_piece - nl.begin(), 3, "REP", 3) != 0) { + errored = true; + break; + } + break; + } + case 1: { + if (*start_piece == '^') + type = 1; + reptable.back().pattern.assign(start_piece + type, iter); + mystrrep(reptable.back().pattern, "_", " "); + if (!reptable.back().pattern.empty() && reptable.back().pattern[reptable.back().pattern.size() - 1] == '$') { + type += 2; + reptable.back().pattern.resize(reptable.back().pattern.size() - 1); + } + break; + } + case 2: { + reptable.back().outstrings[type].assign(start_piece, iter); + mystrrep(reptable.back().outstrings[type], "_", " "); + break; + } + default: + break; + } + ++i; + start_piece = mystrsep(nl, iter); + } + } + if (reptable.back().pattern.empty() || reptable.back().outstrings[type].empty()) { + HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", + af->getlinenum()); + reptable.clear(); + return false; + } + } + return true; +} + +// return replacing table +const std::vector<replentry>& HashMgr::get_reptable() const { + return reptable; +} + +void* HashMgr::arena_alloc(int num_bytes) { + static const int MIN_CHUNK_SIZE = 4096; + if (arena.empty() || (current_chunk_size - current_chunk_offset < num_bytes)) { + current_chunk_size = std::max(MIN_CHUNK_SIZE, num_bytes); + arena.push_back(std::make_unique<uint8_t[]>(current_chunk_size)); + current_chunk_offset = 0; + } + + uint8_t* ptr = &arena.back()[current_chunk_offset]; + current_chunk_offset += num_bytes; + outstanding_arena_allocations++; + return ptr; +} + +void HashMgr::arena_free(void* ptr) { + --outstanding_arena_allocations; + assert(outstanding_arena_allocations >= 0); +} diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx new file mode 100644 index 0000000000..f367a1c4a6 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx @@ -0,0 +1,182 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef HASHMGR_HXX_ +#define HASHMGR_HXX_ + +#include <stdio.h> +#include <stdint.h> +#include <memory> +#include <string> +#include <vector> + +#include "htypes.hxx" +#include "filemgr.hxx" +#include "w_char.hxx" + +enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; + +// morphological description of a dictionary item can contain +// arbitrary number "ph:" (MORPH_PHON) fields to store typical +// phonetic or other misspellings of that word. +// ratio of lines/lines with "ph:" in the dic file: 1/MORPH_PHON_RATIO +#define MORPH_PHON_RATIO 500 + +class HashMgr { + int tablesize; + struct hentry** tableptr; + flag flag_mode; + int complexprefixes; + int utf8; + unsigned short forbiddenword; + int langnum; + std::string enc; + std::string lang; + struct cs_info* csconv; + std::string ignorechars; + std::vector<w_char> ignorechars_utf16; + int numaliasf; // flag vector `compression' with aliases + unsigned short** aliasf; + unsigned short* aliasflen; + int numaliasm; // morphological desciption `compression' with aliases + char** aliasm; + // reptable created from REP table of aff file and from "ph:" fields + // of the dic file. It contains phonetic and other common misspellings + // (letters, letter groups and words) for better suggestions + std::vector<replentry> reptable; + + public: + HashMgr(const char* tpath, const char* apath, const char* key = NULL); + ~HashMgr(); + + struct hentry* lookup(const char*) const; + int hash(const char*) const; + struct hentry* walk_hashtable(int& col, struct hentry* hp) const; + + int add(const std::string& word); + int add_with_affix(const std::string& word, const std::string& pattern); + int remove(const std::string& word); +private: + // Only internal consumers are allowed to arena-allocate. + int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af, bool arena) const; +public: + int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const { + return decode_flags(result, flags, af, /* arena = */ false); + } + bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const; + unsigned short decode_flag(const char* flag) const; + char* encode_flag(unsigned short flag) const; + int is_aliasf() const; + int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const; + int is_aliasm() const; + char* get_aliasm(int index) const; + const std::vector<replentry>& get_reptable() const; + + private: + int get_clen_and_captype(const std::string& word, int* captype); + int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf); + int load_tables(const char* tpath, const char* key); + int add_word(const std::string& word, + int wcl, + unsigned short* ap, + int al, + const std::string* desc, + bool onlyupcase, + int captype); + int load_config(const char* affpath, const char* key); + bool parse_aliasf(const std::string& line, FileMgr* af); + int add_hidden_capitalized_word(const std::string& word, + int wcl, + unsigned short* flags, + int al, + const std::string* dp, + int captype); + bool parse_aliasm(const std::string& line, FileMgr* af); + bool parse_reptable(const std::string& line, FileMgr* af); + int remove_forbidden_flag(const std::string& word); + + // Our Mozilla fork uses a simple arena allocator for certain strings which + // persist for the lifetime of the HashMgr in order to avoid heap fragmentation. + // It's a simple bump-allocator, so we can't actually free() memory midway + // through the lifecycle, but we have a dummy free() implementation to ensure + // that our calls to arena_alloc() and arena_free() are balanced. + void* arena_alloc(int num_bytes); + void* arena_alloc(int num_bytes) const { + return const_cast<HashMgr*>(this)->arena_alloc(num_bytes); + } + void arena_free(void* ptr); + + std::vector<std::unique_ptr<uint8_t[]>> arena; + int current_chunk_size = 0; + int current_chunk_offset = 0; + int outstanding_arena_allocations = 0; +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/htypes.hxx b/extensions/spellcheck/hunspell/src/htypes.hxx new file mode 100644 index 0000000000..44366b1d68 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/htypes.hxx @@ -0,0 +1,75 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef HTYPES_HXX_ +#define HTYPES_HXX_ + +#define ROTATE_LEN 5 + +#define ROTATE(v, q) \ + (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1)); + +// hentry options +#define H_OPT (1 << 0) // is there optional morphological data? +#define H_OPT_ALIASM (1 << 1) // using alias compression? +#define H_OPT_PHON (1 << 2) // is there ph: field in the morphological data? +#define H_OPT_INITCAP (1 << 3) // is dictionary word capitalized? + +// see also csutil.hxx +#define HENTRY_WORD(h) &(h->word[0]) + +// approx. number of user defined words +#define USERWORD 1000 + +#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) +# define HUNSPELL_THREAD_LOCAL thread_local +#else +# define HUNSPELL_THREAD_LOCAL static +#endif + +struct hentry { + unsigned char blen; // word length in bytes + unsigned char clen; // word length in characters (different for UTF-8 enc.) + short alen; // length of affix flag vector + unsigned short* astr; // affix flag vector + struct hentry* next; // next word with same hash code + struct hentry* next_homonym; // next homonym word (with same hash code) + char var; // bit vector of H_OPT hentry options + char word[1]; // variable-length word (8-bit or UTF-8 encoding) +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx new file mode 100644 index 0000000000..4afafdadc1 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hunspell.cxx @@ -0,0 +1,2249 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <time.h> + +#include "affixmgr.hxx" +#include "hunspell.hxx" +#include "suggestmgr.hxx" +#include "hunspell.h" +#include "csutil.hxx" + +#include <limits> +#include <string> + +#define MAXWORDUTF8LEN (MAXWORDLEN * 3) + +class HunspellImpl +{ +public: + HunspellImpl(const char* affpath, const char* dpath, const char* key = NULL); + ~HunspellImpl(); + int add_dic(const char* dpath, const char* key = NULL); + std::vector<std::string> suffix_suggest(const std::string& root_word); + std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl); + std::vector<std::string> generate(const std::string& word, const std::string& pattern); + std::vector<std::string> stem(const std::string& word); + std::vector<std::string> stem(const std::vector<std::string>& morph); + std::vector<std::string> analyze(const std::string& word); + int get_langnum() const; + bool input_conv(const std::string& word, std::string& dest); + bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector<std::string> suggest(const std::string& word); + const std::string& get_wordchars_cpp() const; + const std::vector<w_char>& get_wordchars_utf16() const; + const std::string& get_dict_encoding() const; + int add(const std::string& word); + int add_with_affix(const std::string& word, const std::string& example); + int remove(const std::string& word); + const std::string& get_version_cpp() const; + struct cs_info* get_csconv(); + + int spell(const char* word, int* info = NULL, char** root = NULL); + int suggest(char*** slst, const char* word); + int suffix_suggest(char*** slst, const char* root_word); + void free_list(char*** slst, int n); + char* get_dic_encoding(); + int analyze(char*** slst, const char* word); + int stem(char*** slst, const char* word); + int stem(char*** slst, char** morph, int n); + int generate(char*** slst, const char* word, const char* word2); + int generate(char*** slst, const char* word, char** desc, int n); + const char* get_wordchars() const; + const char* get_version() const; + int input_conv(const char* word, char* dest, size_t destsize); + +private: + AffixMgr* pAMgr; + std::vector<HashMgr*> m_HMgrs; + SuggestMgr* pSMgr; + char* affixpath; + std::string encoding; + struct cs_info* csconv; + int langnum; + int utf8; + int complexprefixes; + std::vector<std::string> wordbreak; + +private: + std::vector<std::string> analyze_internal(const std::string& word); + bool spell_internal(const std::string& word, int* info = NULL, std::string* root = NULL); + std::vector<std::string> suggest_internal(const std::string& word, + bool& capitalized, size_t& abbreviated, int& captype); + void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev); + size_t cleanword2(std::string& dest, + std::vector<w_char>& dest_u, + const std::string& src, + int* pcaptype, + size_t* pabbrev); + void clean_ignore(std::string& dest, const std::string& src); + void mkinitcap(std::string& u8); + int mkinitcap2(std::string& u8, std::vector<w_char>& u16); + int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); + void mkallcap(std::string& u8); + int mkallsmall2(std::string& u8, std::vector<w_char>& u16); + struct hentry* checkword(const std::string& source, int* info, std::string* root); + std::string sharps_u8_l1(const std::string& source); + hentry* + spellsharps(std::string& base, size_t start_pos, int, int, int* info, std::string* root); + int is_keepcase(const hentry* rv); + void insert_sug(std::vector<std::string>& slst, const std::string& word); + void cat_result(std::string& result, const std::string& st); + std::vector<std::string> spellml(const std::string& word); + std::string get_xml_par(const std::string& par, std::string::size_type pos); + std::string::size_type get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr); + std::vector<std::string> get_xml_list(const std::string& list, std::string::size_type pos, const char* tag); + int check_xml_par(const std::string& q, std::string::size_type pos, const char* attr, const char* value); +private: + HunspellImpl(const HunspellImpl&); + HunspellImpl& operator=(const HunspellImpl&); +}; + +HunspellImpl::HunspellImpl(const char* affpath, const char* dpath, const char* key) { + csconv = NULL; + utf8 = 0; + complexprefixes = 0; + affixpath = mystrdup(affpath); + + /* first set up the hash manager */ + m_HMgrs.push_back(new HashMgr(dpath, affpath, key)); + + /* next set up the affix manager */ + /* it needs access to the hash manager lookup methods */ + pAMgr = new AffixMgr(affpath, m_HMgrs, key); + + /* get the preferred try string and the dictionary */ + /* encoding from the Affix Manager for that dictionary */ + char* try_string = pAMgr->get_try_string(); + encoding = pAMgr->get_encoding(); + langnum = pAMgr->get_langnum(); + utf8 = pAMgr->get_utf8(); + if (!utf8) + csconv = get_current_cs(encoding); + complexprefixes = pAMgr->get_complexprefixes(); + wordbreak = pAMgr->get_breaktable(); + + /* and finally set up the suggestion manager */ + pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr); + if (try_string) + free(try_string); +} + +HunspellImpl::~HunspellImpl() { + delete pSMgr; + delete pAMgr; + for (size_t i = 0; i < m_HMgrs.size(); ++i) + delete m_HMgrs[i]; + pSMgr = NULL; + pAMgr = NULL; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif + csconv = NULL; + if (affixpath) + free(affixpath); + affixpath = NULL; +} + +// load extra dictionaries +int HunspellImpl::add_dic(const char* dpath, const char* key) { + if (!affixpath) + return 1; + m_HMgrs.push_back(new HashMgr(dpath, affixpath, key)); + return 0; +} + + +// make a copy of src at dest while removing all characters +// specified in IGNORE rule +void HunspellImpl::clean_ignore(std::string& dest, + const std::string& src) { + dest.clear(); + dest.assign(src); + const char* ignoredchars = pAMgr ? pAMgr->get_ignore() : NULL; + if (ignoredchars != NULL) { + if (utf8) { + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(dest, ignoredchars_utf16); + } else { + remove_ignored_chars(dest, ignoredchars); + } + } +} + + +// make a copy of src at destination while removing all leading +// blanks and removing any trailing periods after recording +// their presence with the abbreviation flag +// also since already going through character by character, +// set the capitalization type +// return the length of the "cleaned" (and UTF-8 encoded) word + +size_t HunspellImpl::cleanword2(std::string& dest, + std::vector<w_char>& dest_utf, + const std::string& src, + int* pcaptype, + size_t* pabbrev) { + dest.clear(); + dest_utf.clear(); + + // remove IGNORE characters from the string + std::string w2; + clean_ignore(w2, src); + + const char* q = w2.c_str(); + + // first skip over any leading blanks + while (*q == ' ') + ++q; + + // now strip off any trailing periods (recording their presence) + *pabbrev = 0; + int nl = strlen(q); + while ((nl > 0) && (*(q + nl - 1) == '.')) { + nl--; + (*pabbrev)++; + } + + // if no characters are left it can't be capitalized + if (nl <= 0) { + *pcaptype = NOCAP; + return 0; + } + + dest.append(q, nl); + nl = dest.size(); + if (utf8) { + u8_u16(dest_utf, dest); + *pcaptype = get_captype_utf8(dest_utf, langnum); + } else { + *pcaptype = get_captype(dest, csconv); + } + return nl; +} + +void HunspellImpl::cleanword(std::string& dest, + const std::string& src, + int* pcaptype, + int* pabbrev) { + dest.clear(); + const unsigned char* q = (const unsigned char*)src.c_str(); + int firstcap = 0; + + // first skip over any leading blanks + while (*q == ' ') + ++q; + + // now strip off any trailing periods (recording their presence) + *pabbrev = 0; + int nl = strlen((const char*)q); + while ((nl > 0) && (*(q + nl - 1) == '.')) { + nl--; + (*pabbrev)++; + } + + // if no characters are left it can't be capitalized + if (nl <= 0) { + *pcaptype = NOCAP; + return; + } + + // now determine the capitalization type of the first nl letters + int ncap = 0; + int nneutral = 0; + int nc = 0; + + if (!utf8) { + while (nl > 0) { + nc++; + if (csconv[(*q)].ccase) + ncap++; + if (csconv[(*q)].cupper == csconv[(*q)].clower) + nneutral++; + dest.push_back(*q++); + nl--; + } + // remember to terminate the destination string + firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase; + } else { + std::vector<w_char> t; + u8_u16(t, src); + for (size_t i = 0; i < t.size(); ++i) { + unsigned short idx = (t[i].h << 8) + t[i].l; + unsigned short low = unicodetolower(idx, langnum); + if (idx != low) + ncap++; + if (unicodetoupper(idx, langnum) == low) + nneutral++; + } + u16_u8(dest, t); + if (ncap) { + unsigned short idx = (t[0].h << 8) + t[0].l; + firstcap = (idx != unicodetolower(idx, langnum)); + } + } + + // now finally set the captype + if (ncap == 0) { + *pcaptype = NOCAP; + } else if ((ncap == 1) && firstcap) { + *pcaptype = INITCAP; + } else if ((ncap == nc) || ((ncap + nneutral) == nc)) { + *pcaptype = ALLCAP; + } else if ((ncap > 1) && firstcap) { + *pcaptype = HUHINITCAP; + } else { + *pcaptype = HUHCAP; + } +} + +void HunspellImpl::mkallcap(std::string& u8) { + if (utf8) { + std::vector<w_char> u16; + u8_u16(u16, u8); + ::mkallcap_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkallcap(u8, csconv); + } +} + +int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) { + if (utf8) { + ::mkallsmall_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkallsmall(u8, csconv); + } + return u8.size(); +} + +// convert UTF-8 sharp S codes to latin 1 +std::string HunspellImpl::sharps_u8_l1(const std::string& source) { + std::string dest(source); + mystrrep(dest, "\xC3\x9F", "\xDF"); + return dest; +} + +// recursive search for right ss - sharp s permutations +hentry* HunspellImpl::spellsharps(std::string& base, + size_t n_pos, + int n, + int repnum, + int* info, + std::string* root) { + size_t pos = base.find("ss", n_pos); + if (pos != std::string::npos && (n < MAXSHARPS)) { + base[pos] = '\xC3'; + base[pos + 1] = '\x9F'; + hentry* h = spellsharps(base, pos + 2, n + 1, repnum + 1, info, root); + if (h) + return h; + base[pos] = 's'; + base[pos + 1] = 's'; + h = spellsharps(base, pos + 2, n + 1, repnum, info, root); + if (h) + return h; + } else if (repnum > 0) { + if (utf8) + return checkword(base, info, root); + std::string tmp(sharps_u8_l1(base)); + return checkword(tmp, info, root); + } + return NULL; +} + +int HunspellImpl::is_keepcase(const hentry* rv) { + return pAMgr && rv->astr && pAMgr->get_keepcase() && + TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen); +} + +/* insert a word to the beginning of the suggestion array */ +void HunspellImpl::insert_sug(std::vector<std::string>& slst, const std::string& word) { + slst.insert(slst.begin(), word); +} + +bool HunspellImpl::spell(const std::string& word, int* info, std::string* root) { + bool r = spell_internal(word, info, root); + if (r && root) { + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + std::string wspace; + if (rl->conv(*root, wspace)) { + *root = wspace; + } + } + } + return r; +} + +bool HunspellImpl::spell_internal(const std::string& word, int* info, std::string* root) { + struct hentry* rv = NULL; + + int info2 = 0; + if (!info) + info = &info2; + else + *info = 0; + + // Hunspell supports XML input of the simplified API (see manual) + if (word == SPELL_XML) + return true; + if (utf8) { + if (word.size() >= MAXWORDUTF8LEN) + return false; + } else { + if (word.size() >= MAXWORDLEN) + return false; + } + int captype = NOCAP; + size_t abbv = 0; + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; + + // input conversion + RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; + { + std::string wspace; + + bool convstatus = rl ? rl->conv(word, wspace) : false; + if (convstatus) + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &captype, &abbv); + } + +#ifdef MOZILLA_CLIENT + // accept the abbreviated words without dots + // workaround for the incomplete tokenization of Mozilla + abbv = 1; +#endif + + if (wl == 0 || m_HMgrs.empty()) + return true; + if (root) + root->clear(); + + // allow numbers with dots, dashes and commas (but forbid double separators: + // "..", "--" etc.) + enum { NBEGIN, NNUM, NSEP }; + int nstate = NBEGIN; + size_t i; + + for (i = 0; (i < wl); i++) { + if ((scw[i] <= '9') && (scw[i] >= '0')) { + nstate = NNUM; + } else if ((scw[i] == ',') || (scw[i] == '.') || (scw[i] == '-')) { + if ((nstate == NSEP) || (i == 0)) + break; + nstate = NSEP; + } else + break; + } + if ((i == wl) && (nstate == NNUM)) + return true; + + switch (captype) { + case HUHCAP: + /* FALLTHROUGH */ + case HUHINITCAP: + *info |= SPELL_ORIGCAP; + /* FALLTHROUGH */ + case NOCAP: + rv = checkword(scw, info, root); + if ((abbv) && !(rv)) { + std::string u8buffer(scw); + u8buffer.push_back('.'); + rv = checkword(u8buffer, info, root); + } + break; + case ALLCAP: { + *info |= SPELL_ORIGCAP; + rv = checkword(scw, info, root); + if (rv) + break; + if (abbv) { + std::string u8buffer(scw); + u8buffer.push_back('.'); + rv = checkword(u8buffer, info, root); + if (rv) + break; + } + // Spec. prefix handling for Catalan, French, Italian: + // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). + size_t apos = pAMgr ? scw.find('\'') : std::string::npos; + if (apos != std::string::npos) { + mkallsmall2(scw, sunicw); + //conversion may result in string with different len to pre-mkallsmall2 + //so re-scan + if (apos != std::string::npos && apos < scw.size() - 1) { + std::string part1 = scw.substr(0, apos+1); + std::string part2 = scw.substr(apos+1); + if (utf8) { + std::vector<w_char> part1u, part2u; + u8_u16(part1u, part1); + u8_u16(part2u, part2); + mkinitcap2(part2, part2u); + scw = part1 + part2; + sunicw = part1u; + sunicw.insert(sunicw.end(), part2u.begin(), part2u.end()); + rv = checkword(scw, info, root); + if (rv) + break; + } else { + mkinitcap2(part2, sunicw); + scw = part1 + part2; + rv = checkword(scw, info, root); + if (rv) + break; + } + mkinitcap2(scw, sunicw); + rv = checkword(scw, info, root); + if (rv) + break; + } + } + if (pAMgr && pAMgr->get_checksharps() && scw.find("SS") != std::string::npos) { + + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); + if (!rv) { + mkinitcap2(scw, sunicw); + rv = spellsharps(scw, 0, 0, 0, info, root); + } + if ((abbv) && !(rv)) { + u8buffer.push_back('.'); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); + if (!rv) { + u8buffer = std::string(scw); + u8buffer.push_back('.'); + rv = spellsharps(u8buffer, 0, 0, 0, info, root); + } + } + if (rv) + break; + } + } + /* FALLTHROUGH */ + case INITCAP: { + // handle special capitalization of dotted I + bool Idot = (utf8 && (unsigned char) scw[0] == 0xc4 && (unsigned char) scw[1] == 0xb0); + *info |= SPELL_ORIGCAP; + if (captype == ALLCAP) { + mkallsmall2(scw, sunicw); + mkinitcap2(scw, sunicw); + if (Idot) + scw.replace(0, 1, "\xc4\xb0"); + } + if (captype == INITCAP) + *info |= SPELL_INITCAP; + rv = checkword(scw, info, root); + if (captype == INITCAP) + *info &= ~SPELL_INITCAP; + // forbid bad capitalization + // (for example, ijs -> Ijs instead of IJs in Dutch) + // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) + if (*info & SPELL_FORBIDDEN) { + rv = NULL; + break; + } + if (rv && is_keepcase(rv) && (captype == ALLCAP)) + rv = NULL; + if (rv || (Idot && langnum != LANG_az && langnum != LANG_tr && langnum != LANG_crh)) + break; + + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + + rv = checkword(u8buffer, info, root); + if (abbv && !rv) { + u8buffer.push_back('.'); + rv = checkword(u8buffer, info, root); + if (!rv) { + u8buffer = scw; + u8buffer.push_back('.'); + if (captype == INITCAP) + *info |= SPELL_INITCAP; + rv = checkword(u8buffer, info, root); + if (captype == INITCAP) + *info &= ~SPELL_INITCAP; + if (rv && is_keepcase(rv) && (captype == ALLCAP)) + rv = NULL; + break; + } + } + if (rv && is_keepcase(rv) && + ((captype == ALLCAP) || + // if CHECKSHARPS: KEEPCASE words with \xDF are allowed + // in INITCAP form, too. + !(pAMgr->get_checksharps() && + ((utf8 && u8buffer.find("\xC3\x9F") != std::string::npos) || + (!utf8 && u8buffer.find('\xDF') != std::string::npos))))) + rv = NULL; + break; + } + } + + if (rv) { + if (pAMgr && pAMgr->get_warn() && rv->astr && + TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) { + *info |= SPELL_WARN; + if (pAMgr->get_forbidwarn()) + return false; + return true; + } + return true; + } + + // recursive breaking at break points + if (!wordbreak.empty() && !(*info & SPELL_FORBIDDEN)) { + + int nbr = 0; + wl = scw.size(); + + // calculate break points for recursion limit + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t pos = 0; + while ((pos = scw.find(wordbreak[j], pos)) != std::string::npos) { + ++nbr; + pos += wordbreak[j].size(); + } + } + if (nbr >= 10) + return false; + + // check boundary patterns (^begin and end$) + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t plen = wordbreak[j].size(); + if (plen == 1 || plen > wl) + continue; + + if (wordbreak[j][0] == '^' && + scw.compare(0, plen - 1, wordbreak[j], 1, plen -1) == 0 && spell(scw.substr(plen - 1))) + return true; + + if (wordbreak[j][plen - 1] == '$' && + scw.compare(wl - plen + 1, plen - 1, wordbreak[j], 0, plen - 1) == 0) { + std::string suffix(scw.substr(wl - plen + 1)); + scw.resize(wl - plen + 1); + if (spell(scw)) + return true; + scw.append(suffix); + } + } + + // other patterns + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t plen = wordbreak[j].size(); + size_t found = scw.find(wordbreak[j]); + if ((found > 0) && (found < wl - plen)) { + size_t found2 = scw.find(wordbreak[j], found + 1); + // try to break at the second occurance + // to recognize dictionary words with wordbreak + if (found2 > 0 && (found2 < wl - plen)) + found = found2; + if (!spell(scw.substr(found + plen))) + continue; + std::string suffix(scw.substr(found)); + scw.resize(found); + // examine 2 sides of the break point + if (spell(scw)) + return true; + scw.append(suffix); + + // LANG_hu: spec. dash rule + if (langnum == LANG_hu && wordbreak[j] == "-") { + suffix = scw.substr(found + 1); + scw.resize(found + 1); + if (spell(scw)) + return true; // check the first part with dash + scw.append(suffix); + } + // end of LANG specific region + } + } + + // other patterns (break at first break point) + for (size_t j = 0; j < wordbreak.size(); ++j) { + size_t plen = wordbreak[j].size(); + size_t found = scw.find(wordbreak[j]); + if ((found > 0) && (found < wl - plen)) { + if (!spell(scw.substr(found + plen))) + continue; + std::string suffix(scw.substr(found)); + scw.resize(found); + // examine 2 sides of the break point + if (spell(scw)) + return true; + scw.append(suffix); + + // LANG_hu: spec. dash rule + if (langnum == LANG_hu && wordbreak[j] == "-") { + suffix = scw.substr(found + 1); + scw.resize(found + 1); + if (spell(scw)) + return true; // check the first part with dash + scw.append(suffix); + } + // end of LANG specific region + } + } + } + + return false; +} + +struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::string* root) { + std::string w2; + const char* word; + int len; + + // remove IGNORE characters from the string + clean_ignore(w2, w); + + word = w2.c_str(); + len = w2.size(); + + if (!len) + return NULL; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + } + + word = w2.c_str(); + + // look word in hash table + struct hentry* he = NULL; + for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { + he = m_HMgrs[i]->lookup(word); + + // check forbidden and onlyincompound words + if ((he) && (he->astr) && (pAMgr) && + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { + if (info) + *info |= SPELL_FORBIDDEN; + // LANG_hu section: set dash information for suggestions + if (langnum == LANG_hu) { + if (pAMgr->get_compoundflag() && + TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) { + if (info) + *info |= SPELL_COMPOUND; + } + } + return NULL; + } + + // he = next not needaffix, onlyincompound homonym or onlyupcase word + while (he && (he->astr) && pAMgr && + ((pAMgr->get_needaffix() && + TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) || + (pAMgr->get_onlyincompound() && + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || + (info && (*info & SPELL_INITCAP) && + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) + he = he->next_homonym; + } + + // check with affixes + if (!he && pAMgr) { + // try stripping off affixes */ + he = pAMgr->affix_check(word, len, 0); + + // check compound restriction and onlyupcase + if (he && he->astr && + ((pAMgr->get_onlyincompound() && + TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || + (info && (*info & SPELL_INITCAP) && + TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) { + he = NULL; + } + + if (he) { + if ((he->astr) && (pAMgr) && + TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { + if (info) + *info |= SPELL_FORBIDDEN; + return NULL; + } + if (root) { + root->assign(he->word); + if (complexprefixes) { + if (utf8) + reverseword_utf(*root); + else + reverseword(*root); + } + } + // try check compound word + } else if (pAMgr->get_compound()) { + struct hentry* rwords[100]; // buffer for COMPOUND pattern checking + he = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 0, info); + // LANG_hu section: `moving rule' with last dash + if ((!he) && (langnum == LANG_hu) && (word[len - 1] == '-')) { + std::string dup(word, len - 1); + he = pAMgr->compound_check(dup, -5, 0, 100, 0, NULL, (hentry**)&rwords, 1, 0, info); + } + // end of LANG specific region + if (he) { + if (root) { + root->assign(he->word); + if (complexprefixes) { + if (utf8) + reverseword_utf(*root); + else + reverseword(*root); + } + } + if (info) + *info |= SPELL_COMPOUND; + } + } + } + + return he; +} + +std::vector<std::string> HunspellImpl::suggest(const std::string& word) { + bool capwords; + size_t abbv; + int captype; + std::vector<std::string> slst = suggest_internal(word, capwords, abbv, captype); + // word reversing wrapper for complex prefixes + if (complexprefixes) { + for (size_t j = 0; j < slst.size(); ++j) { + if (utf8) + reverseword_utf(slst[j]); + else + reverseword(slst[j]); + } + } + + // capitalize + if (capwords) + for (size_t j = 0; j < slst.size(); ++j) { + mkinitcap(slst[j]); + } + + // expand suggestions with dot(s) + if (abbv && pAMgr && pAMgr->get_sugswithdots()) { + for (size_t j = 0; j < slst.size(); ++j) { + slst[j].append(word.substr(word.size() - abbv)); + } + } + + // remove bad capitalized and forbidden forms + if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) { + switch (captype) { + case INITCAP: + case ALLCAP: { + size_t l = 0; + for (size_t j = 0; j < slst.size(); ++j) { + if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) { + std::string s; + std::vector<w_char> w; + if (utf8) { + u8_u16(w, slst[j]); + } else { + s = slst[j]; + } + mkallsmall2(s, w); + if (spell(s)) { + slst[l] = s; + ++l; + } else { + mkinitcap2(s, w); + if (spell(s)) { + slst[l] = s; + ++l; + } + } + } else { + slst[l] = slst[j]; + ++l; + } + } + slst.resize(l); + } + } + } + + // remove duplications + size_t l = 0; + for (size_t j = 0; j < slst.size(); ++j) { + slst[l] = slst[j]; + for (size_t k = 0; k < l; ++k) { + if (slst[k] == slst[j]) { + --l; + break; + } + } + ++l; + } + slst.resize(l); + + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; +} + +std::vector<std::string> HunspellImpl::suggest_internal(const std::string& word, + bool& capwords, size_t& abbv, int& captype) { + captype = NOCAP; + abbv = 0; + capwords = false; + + std::vector<std::string> slst; + + int onlycmpdsug = 0; + if (!pSMgr || m_HMgrs.empty()) + return slst; + + // process XML input of the simplified API (see manual) + if (word.compare(0, sizeof(SPELL_XML) - 3, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) { + return spellml(word); + } + if (utf8) { + if (word.size() >= MAXWORDUTF8LEN) + return slst; + } else { + if (word.size() >= MAXWORDLEN) + return slst; + } + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + { + std::string wspace; + + bool convstatus = rl ? rl->conv(word, wspace) : false; + if (convstatus) + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &captype, &abbv); + + if (wl == 0) + return slst; + } + + bool good = false; + + clock_t timelimit; + // initialize in every suggestion call + timelimit = clock(); + + // check capitalized form for FORCEUCASE + if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) { + int info = SPELL_ORIGCAP; + if (checkword(scw, &info, NULL)) { + std::string form(scw); + mkinitcap(form); + slst.push_back(form); + return slst; + } + } + + switch (captype) { + case NOCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (abbv) { + std::string wspace(scw); + wspace.push_back('.'); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + break; + } + + case INITCAP: { + capwords = true; + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case HUHINITCAP: + capwords = true; + /* FALLTHROUGH */ + case HUHCAP: { + good |= pSMgr->suggest(slst, scw.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + // something.The -> something. The + size_t dot_pos = scw.find('.'); + if (dot_pos != std::string::npos) { + std::string postdot = scw.substr(dot_pos + 1); + int captype_; + if (utf8) { + std::vector<w_char> postdotu; + u8_u16(postdotu, postdot); + captype_ = get_captype_utf8(postdotu, langnum); + } else { + captype_ = get_captype(postdot, csconv); + } + if (captype_ == INITCAP) { + std::string str(scw); + str.insert(dot_pos + 1, 1, ' '); + insert_sug(slst, str); + } + } + + std::string wspace; + + if (captype == HUHINITCAP) { + // TheOpenOffice.org -> The OpenOffice.org + wspace = scw; + mkinitsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + wspace = scw; + mkallsmall2(wspace, sunicw); + if (spell(wspace.c_str())) + insert_sug(slst, wspace); + size_t prevns = slst.size(); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (captype == HUHINITCAP) { + mkinitcap2(wspace, sunicw); + if (spell(wspace.c_str())) + insert_sug(slst, wspace); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + } + // aNew -> "a New" (instead of "a new") + for (size_t j = prevns; j < slst.size(); ++j) { + const char* space = strchr(slst[j].c_str(), ' '); + if (space) { + size_t slen = strlen(space + 1); + // different case after space (need capitalisation) + if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) { + std::string first(slst[j].c_str(), space + 1); + std::string second(space + 1); + std::vector<w_char> w; + if (utf8) + u8_u16(w, second); + mkinitcap2(second, w); + // set as first suggestion + slst.erase(slst.begin() + j); + slst.insert(slst.begin(), first + second); + } + } + } + break; + } + + case ALLCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + if (pAMgr && pAMgr->get_keepcase() && spell(wspace.c_str())) + insert_sug(slst, wspace); + mkinitcap2(wspace, sunicw); + good |= pSMgr->suggest(slst, wspace.c_str(), &onlycmpdsug); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (size_t j = 0; j < slst.size(); ++j) { + mkallcap(slst[j]); + if (pAMgr && pAMgr->get_checksharps()) { + if (utf8) { + mystrrep(slst[j], "\xC3\x9F", "SS"); + } else { + mystrrep(slst[j], "\xDF", "SS"); + } + } + } + break; + } + } + + // LANG_hu section: replace '-' with ' ' in Hungarian + if (langnum == LANG_hu) { + for (size_t j = 0; j < slst.size(); ++j) { + size_t pos = slst[j].find('-'); + if (pos != std::string::npos) { + int info; + std::string w(slst[j].substr(0, pos)); + w.append(slst[j].substr(pos + 1)); + (void)spell(w, &info, NULL); + if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) { + slst[j][pos] = ' '; + } else + slst[j][pos] = '-'; + } + } + } + // END OF LANG_hu section + // try ngram approach since found nothing good suggestion + if (!good && pAMgr && (slst.empty() || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0)) { + switch (captype) { + case NOCAP: { + pSMgr->ngsuggest(slst, scw.c_str(), m_HMgrs, NOCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + /* FALLTHROUGH */ + case HUHINITCAP: + capwords = true; + /* FALLTHROUGH */ + case HUHCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, HUHCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case INITCAP: { + capwords = true; + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, INITCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + break; + } + case ALLCAP: { + std::string wspace(scw); + mkallsmall2(wspace, sunicw); + size_t oldns = slst.size(); + pSMgr->ngsuggest(slst, wspace.c_str(), m_HMgrs, ALLCAP); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (size_t j = oldns; j < slst.size(); ++j) { + mkallcap(slst[j]); + } + break; + } + } + } + + // try dash suggestion (Afo-American -> Afro-American) + // Note: LibreOffice was modified to treat dashes as word + // characters to check "scot-free" etc. word forms, but + // we need to handle suggestions for "Afo-American", etc., + // while "Afro-American" is missing from the dictionary. + // TODO avoid possible overgeneration + size_t dash_pos = scw.find('-'); + if (dash_pos != std::string::npos) { + int nodashsug = 1; + for (size_t j = 0; j < slst.size() && nodashsug == 1; ++j) { + if (slst[j].find('-') != std::string::npos) + nodashsug = 0; + } + + size_t prev_pos = 0; + bool last = false; + + while (!good && nodashsug && !last) { + if (dash_pos == scw.size()) + last = 1; + std::string chunk = scw.substr(prev_pos, dash_pos - prev_pos); + if (!spell(chunk.c_str())) { + std::vector<std::string> nlst = suggest(chunk.c_str()); + if (clock() > timelimit + TIMELIMIT_GLOBAL) + return slst; + for (std::vector<std::string>::reverse_iterator j = nlst.rbegin(); j != nlst.rend(); ++j) { + std::string wspace = scw.substr(0, prev_pos); + wspace.append(*j); + if (!last) { + wspace.append("-"); + wspace.append(scw.substr(dash_pos + 1)); + } + int info = 0; + if (pAMgr && pAMgr->get_forbiddenword()) + checkword(wspace, &info, NULL); + if (!(info & SPELL_FORBIDDEN)) + insert_sug(slst, wspace); + } + nodashsug = 0; + } + if (!last) { + prev_pos = dash_pos + 1; + dash_pos = scw.find('-', prev_pos); + } + if (dash_pos == std::string::npos) + dash_pos = scw.size(); + } + } + return slst; +} + +const std::string& HunspellImpl::get_dict_encoding() const { + return encoding; +} + +std::vector<std::string> HunspellImpl::stem(const std::vector<std::string>& desc) { + std::vector<std::string> slst; + + std::string result2; + if (desc.empty()) + return slst; + for (size_t i = 0; i < desc.size(); ++i) { + + std::string result; + + // add compound word parts (except the last one) + const char* s = desc[i].c_str(); + const char* part = strstr(s, MORPH_PART); + if (part) { + const char* nextpart = strstr(part + 1, MORPH_PART); + while (nextpart) { + std::string field; + copy_field(field, part, MORPH_PART); + result.append(field); + part = nextpart; + nextpart = strstr(part + 1, MORPH_PART); + } + s = part; + } + + std::string tok(s); + size_t alt = 0; + while ((alt = tok.find(" | ", alt)) != std::string::npos) { + tok[alt + 1] = MSEP_ALT; + } + std::vector<std::string> pl = line_tok(tok, MSEP_ALT); + for (size_t k = 0; k < pl.size(); ++k) { + // add derivational suffixes + if (pl[k].find(MORPH_DERI_SFX) != std::string::npos) { + // remove inflectional suffixes + const size_t is = pl[k].find(MORPH_INFL_SFX); + if (is != std::string::npos) + pl[k].resize(is); + std::vector<std::string> singlepl; + singlepl.push_back(pl[k]); + std::string sg = pSMgr->suggest_gen(singlepl, pl[k]); + if (!sg.empty()) { + std::vector<std::string> gen = line_tok(sg, MSEP_REC); + for (size_t j = 0; j < gen.size(); ++j) { + result2.push_back(MSEP_REC); + result2.append(result); + result2.append(gen[j]); + } + } + } else { + result2.push_back(MSEP_REC); + result2.append(result); + if (pl[k].find(MORPH_SURF_PFX) != std::string::npos) { + std::string field; + copy_field(field, pl[k], MORPH_SURF_PFX); + result2.append(field); + } + std::string field; + copy_field(field, pl[k], MORPH_STEM); + result2.append(field); + } + } + } + slst = line_tok(result2, MSEP_REC); + uniqlist(slst); + return slst; +} + +std::vector<std::string> HunspellImpl::stem(const std::string& word) { + return stem(analyze(word)); +} + +const std::string& HunspellImpl::get_wordchars_cpp() const { + return pAMgr->get_wordchars(); +} + +const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const { + return pAMgr->get_wordchars_utf16(); +} + +void HunspellImpl::mkinitcap(std::string& u8) { + if (utf8) { + std::vector<w_char> u16; + u8_u16(u16, u8); + ::mkinitcap_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkinitcap(u8, csconv); + } +} + +int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) { + if (utf8) { + ::mkinitcap_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkinitcap(u8, csconv); + } + return u8.size(); +} + +int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) { + if (utf8) { + ::mkinitsmall_utf(u16, langnum); + u16_u8(u8, u16); + } else { + ::mkinitsmall(u8, csconv); + } + return u8.size(); +} + +int HunspellImpl::add(const std::string& word) { + if (!m_HMgrs.empty()) + return m_HMgrs[0]->add(word); + return 0; +} + +int HunspellImpl::add_with_affix(const std::string& word, const std::string& example) { + if (!m_HMgrs.empty()) + return m_HMgrs[0]->add_with_affix(word, example); + return 0; +} + +int HunspellImpl::remove(const std::string& word) { + if (!m_HMgrs.empty()) + return m_HMgrs[0]->remove(word); + return 0; +} + +const std::string& HunspellImpl::get_version_cpp() const { + return pAMgr->get_version(); +} + +struct cs_info* HunspellImpl::get_csconv() { + return csconv; +} + +void HunspellImpl::cat_result(std::string& result, const std::string& st) { + if (!st.empty()) { + if (!result.empty()) + result.append("\n"); + result.append(st); + } +} + +std::vector<std::string> HunspellImpl::analyze(const std::string& word) { + std::vector<std::string> slst = analyze_internal(word); + // output conversion + RepList* rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL; + if (rl) { + for (size_t i = 0; rl && i < slst.size(); ++i) { + std::string wspace; + if (rl->conv(slst[i], wspace)) { + slst[i] = wspace; + } + } + } + return slst; +} + +std::vector<std::string> HunspellImpl::analyze_internal(const std::string& word) { + std::vector<std::string> slst; + if (!pSMgr || m_HMgrs.empty()) + return slst; + if (utf8) { + if (word.size() >= MAXWORDUTF8LEN) + return slst; + } else { + if (word.size() >= MAXWORDLEN) + return slst; + } + int captype = NOCAP; + size_t abbv = 0; + size_t wl = 0; + + std::string scw; + std::vector<w_char> sunicw; + + // input conversion + RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL; + { + std::string wspace; + + bool convstatus = rl ? rl->conv(word, wspace) : false; + if (convstatus) + wl = cleanword2(scw, sunicw, wspace, &captype, &abbv); + else + wl = cleanword2(scw, sunicw, word, &captype, &abbv); + } + + if (wl == 0) { + if (abbv) { + scw.clear(); + for (wl = 0; wl < abbv; wl++) + scw.push_back('.'); + abbv = 0; + } else + return slst; + } + + std::string result; + + size_t n = 0; + // test numbers + // LANG_hu section: set dash information for suggestions + if (langnum == LANG_hu) { + size_t n2 = 0; + size_t n3 = 0; + + while ((n < wl) && (((scw[n] <= '9') && (scw[n] >= '0')) || + (((scw[n] == '.') || (scw[n] == ',')) && (n > 0)))) { + n++; + if ((scw[n] == '.') || (scw[n] == ',')) { + if (((n2 == 0) && (n > 3)) || + ((n2 > 0) && ((scw[n - 1] == '.') || (scw[n - 1] == ',')))) + break; + n2++; + n3 = n; + } + } + + if ((n == wl) && (n3 > 0) && (n - n3 > 3)) + return slst; + if ((n == wl) || ((n > 0) && ((scw[n] == '%') || (scw[n] == '\xB0')) && + checkword(scw.substr(n), NULL, NULL))) { + result.append(scw); + result.resize(n - 1); + if (n == wl) + cat_result(result, pSMgr->suggest_morph(scw.substr(n - 1))); + else { + std::string chunk = scw.substr(n - 1, 1); + cat_result(result, pSMgr->suggest_morph(chunk)); + result.push_back('+'); // XXX SPEC. MORPHCODE + cat_result(result, pSMgr->suggest_morph(scw.substr(n))); + } + return line_tok(result, MSEP_REC); + } + } + // END OF LANG_hu section + + switch (captype) { + case HUHCAP: + case HUHINITCAP: + case NOCAP: { + cat_result(result, pSMgr->suggest_morph(scw)); + if (abbv) { + std::string u8buffer(scw); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer)); + } + break; + } + case INITCAP: { + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + cat_result(result, pSMgr->suggest_morph(u8buffer)); + cat_result(result, pSMgr->suggest_morph(scw)); + if (abbv) { + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer)); + + u8buffer = scw; + u8buffer.push_back('.'); + + cat_result(result, pSMgr->suggest_morph(u8buffer)); + } + break; + } + case ALLCAP: { + cat_result(result, pSMgr->suggest_morph(scw)); + if (abbv) { + std::string u8buffer(scw); + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer)); + } + mkallsmall2(scw, sunicw); + std::string u8buffer(scw); + mkinitcap2(scw, sunicw); + + cat_result(result, pSMgr->suggest_morph(u8buffer)); + cat_result(result, pSMgr->suggest_morph(scw)); + if (abbv) { + u8buffer.push_back('.'); + cat_result(result, pSMgr->suggest_morph(u8buffer)); + + u8buffer = scw; + u8buffer.push_back('.'); + + cat_result(result, pSMgr->suggest_morph(u8buffer)); + } + break; + } + } + + if (!result.empty()) { + // word reversing wrapper for complex prefixes + if (complexprefixes) { + if (utf8) + reverseword_utf(result); + else + reverseword(result); + } + return line_tok(result, MSEP_REC); + } + + // compound word with dash (HU) I18n + // LANG_hu section: set dash information for suggestions + + size_t dash_pos = langnum == LANG_hu ? scw.find('-') : std::string::npos; + if (dash_pos != std::string::npos) { + int nresult = 0; + + std::string part1 = scw.substr(0, dash_pos); + std::string part2 = scw.substr(dash_pos+1); + + // examine 2 sides of the dash + if (part2.empty()) { // base word ending with dash + if (spell(part1)) { + std::string p = pSMgr->suggest_morph(part1); + if (!p.empty()) { + slst = line_tok(p, MSEP_REC); + return slst; + } + } + } else if (part2.size() == 1 && part2[0] == 'e') { // XXX (HU) -e hat. + if (spell(part1) && (spell("-e"))) { + std::string st = pSMgr->suggest_morph(part1); + if (!st.empty()) { + result.append(st); + } + result.push_back('+'); // XXX spec. separator in MORPHCODE + st = pSMgr->suggest_morph("-e"); + if (!st.empty()) { + result.append(st); + } + return line_tok(result, MSEP_REC); + } + } else { + // first word ending with dash: word- XXX ??? + part1.push_back(' '); + nresult = spell(part1); + part1.erase(part1.size() - 1); + if (nresult && spell(part2) && + ((part2.size() > 1) || ((part2[0] > '0') && (part2[0] < '9')))) { + std::string st = pSMgr->suggest_morph(part1); + if (!st.empty()) { + result.append(st); + result.push_back('+'); // XXX spec. separator in MORPHCODE + } + st = pSMgr->suggest_morph(part2); + if (!st.empty()) { + result.append(st); + } + return line_tok(result, MSEP_REC); + } + } + // affixed number in correct word + if (nresult && (dash_pos > 0) && + (((scw[dash_pos - 1] <= '9') && (scw[dash_pos - 1] >= '0')) || + (scw[dash_pos - 1] == '.'))) { + n = 1; + if (scw[dash_pos - n] == '.') + n++; + // search first not a number character to left from dash + while ((dash_pos >= n) && ((scw[dash_pos - n] == '0') || (n < 3)) && + (n < 6)) { + n++; + } + if (dash_pos < n) + n--; + // numbers: valami1000000-hoz + // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz, + // 56-hoz, 6-hoz + for (; n >= 1; n--) { + if (scw[dash_pos - n] < '0' || scw[dash_pos - n] > '9') { + continue; + } + std::string chunk = scw.substr(dash_pos - n); + if (checkword(chunk, NULL, NULL)) { + result.append(chunk); + std::string st = pSMgr->suggest_morph(chunk); + if (!st.empty()) { + result.append(st); + } + return line_tok(result, MSEP_REC); + } + } + } + } + return slst; +} + +std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::vector<std::string>& pl) { + std::vector<std::string> slst; + if (!pSMgr || pl.empty()) + return slst; + std::vector<std::string> pl2 = analyze(word); + int captype = NOCAP; + int abbv = 0; + std::string cw; + cleanword(cw, word, &captype, &abbv); + std::string result; + + for (size_t i = 0; i < pl.size(); ++i) { + cat_result(result, pSMgr->suggest_gen(pl2, pl[i])); + } + + if (!result.empty()) { + // allcap + if (captype == ALLCAP) + mkallcap(result); + + // line split + slst = line_tok(result, MSEP_REC); + + // capitalize + if (captype == INITCAP || captype == HUHINITCAP) { + for (size_t j = 0; j < slst.size(); ++j) { + mkinitcap(slst[j]); + } + } + + // temporary filtering of prefix related errors (eg. + // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks") + std::vector<std::string>::iterator it = slst.begin(); + while (it != slst.end()) { + if (!spell(*it)) { + it = slst.erase(it); + } else { + ++it; + } + } + } + return slst; +} + +std::vector<std::string> HunspellImpl::generate(const std::string& word, const std::string& pattern) { + std::vector<std::string> pl = analyze(pattern); + std::vector<std::string> slst = generate(word, pl); + uniqlist(slst); + return slst; +} + +// minimal XML parser functions +std::string HunspellImpl::get_xml_par(const std::string& in_par, std::string::size_type pos) { + std::string dest; + if (pos == std::string::npos) + return dest; + const char* par = in_par.c_str() + pos; + char end = *par; + if (end == '>') + end = '<'; + else if (end != '\'' && end != '"') + return dest; // bad XML + for (par++; *par != '\0' && *par != end; ++par) { + dest.push_back(*par); + } + mystrrep(dest, "<", "<"); + mystrrep(dest, "&", "&"); + return dest; +} + +int HunspellImpl::get_langnum() const { + return langnum; +} + +bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { + RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL; + if (rl) { + return rl->conv(word, dest); + } + dest.assign(word); + return false; +} + +// return the beginning of the element (attr == NULL) or the attribute +std::string::size_type HunspellImpl::get_xml_pos(const std::string& s, std::string::size_type pos, const char* attr) { + if (pos == std::string::npos) + return std::string::npos; + + std::string::size_type endpos = s.find('>', pos); + if (attr == NULL) + return endpos; + while (true) { + pos = s.find(attr, pos); + if (pos == std::string::npos || pos >= endpos) + return std::string::npos; + if (s[pos - 1] == ' ' || s[pos - 1] == '\n') + break; + pos += strlen(attr); + } + return pos + strlen(attr); +} + +int HunspellImpl::check_xml_par(const std::string& q, std::string::size_type pos, + const char* attr, + const char* value) { + std::string cw = get_xml_par(q, get_xml_pos(q, pos, attr)); + if (cw == value) + return 1; + return 0; +} + +std::vector<std::string> HunspellImpl::get_xml_list(const std::string& list, std::string::size_type pos, const char* tag) { + std::vector<std::string> slst; + if (pos == std::string::npos) + return slst; + while (true) { + pos = list.find(tag, pos); + if (pos == std::string::npos) + break; + std::string cw = get_xml_par(list, pos + strlen(tag) - 1); + if (cw.empty()) { + break; + } + slst.push_back(cw); + ++pos; + } + return slst; +} + +std::vector<std::string> HunspellImpl::spellml(const std::string& in_word) { + std::vector<std::string> slst; + + std::string::size_type qpos = in_word.find("<query"); + if (qpos == std::string::npos) + return slst; // bad XML input + + std::string::size_type q2pos = in_word.find('>', qpos); + if (q2pos == std::string::npos) + return slst; // bad XML input + + q2pos = in_word.find("<word", q2pos); + if (q2pos == std::string::npos) + return slst; // bad XML input + + if (check_xml_par(in_word, qpos, "type=", "analyze")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); + if (!cw.empty()) + slst = analyze(cw); + if (slst.empty()) + return slst; + // convert the result to <code><a>ana1</a><a>ana2</a></code> format + std::string r; + r.append("<code>"); + for (size_t i = 0; i < slst.size(); ++i) { + r.append("<a>"); + + std::string entry(slst[i]); + mystrrep(entry, "\t", " "); + mystrrep(entry, "&", "&"); + mystrrep(entry, "<", "<"); + r.append(entry); + + r.append("</a>"); + } + r.append("</code>"); + slst.clear(); + slst.push_back(r); + return slst; + } else if (check_xml_par(in_word, qpos, "type=", "stem")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); + if (!cw.empty()) + return stem(cw); + } else if (check_xml_par(in_word, qpos, "type=", "generate")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); + if (cw.empty()) + return slst; + std::string::size_type q3pos = in_word.find("<word", q2pos + 1); + if (q3pos != std::string::npos) { + std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); + if (!cw2.empty()) { + return generate(cw, cw2); + } + } else { + q2pos = in_word.find("<code", q2pos + 1); + if (q2pos != std::string::npos) { + std::vector<std::string> slst2 = get_xml_list(in_word, in_word.find('>', q2pos), "<a>"); + if (!slst2.empty()) { + slst = generate(cw, slst2); + uniqlist(slst); + return slst; + } + } + } + } else if (check_xml_par(in_word, qpos, "type=", "add")) { + std::string cw = get_xml_par(in_word, in_word.find('>', q2pos)); + if (cw.empty()) + return slst; + std::string::size_type q3pos = in_word.find("<word", q2pos + 1); + if (q3pos != std::string::npos) { + std::string cw2 = get_xml_par(in_word, in_word.find('>', q3pos)); + if (!cw2.empty()) { + add_with_affix(cw, cw2); + } else { + add(cw); + } + } else { + add(cw); + } + } + return slst; +} + +std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_word) { + std::vector<std::string> slst; + struct hentry* he = NULL; + int len; + std::string w2; + const char* word; + const char* ignoredchars = pAMgr->get_ignore(); + if (ignoredchars != NULL) { + w2.assign(root_word); + if (utf8) { + const std::vector<w_char>& ignoredchars_utf16 = + pAMgr->get_ignore_utf16(); + remove_ignored_chars_utf(w2, ignoredchars_utf16); + } else { + remove_ignored_chars(w2, ignoredchars); + } + word = w2.c_str(); + } else + word = root_word.c_str(); + + len = strlen(word); + + if (!len) + return slst; + + for (size_t i = 0; (i < m_HMgrs.size()) && !he; ++i) { + he = m_HMgrs[i]->lookup(word); + } + if (he) { + slst = pAMgr->get_suffix_words(he->astr, he->alen, root_word.c_str()); + } + return slst; +} + +namespace { + int munge_vector(char*** slst, const std::vector<std::string>& items) { + if (items.empty()) { + *slst = NULL; + return 0; + } else { + *slst = (char**)malloc(sizeof(char*) * items.size()); + if (!*slst) + return 0; + for (size_t i = 0; i < items.size(); ++i) + (*slst)[i] = mystrdup(items[i].c_str()); + } + return items.size(); + } +} + +int HunspellImpl::spell(const char* word, int* info, char** root) { + std::string sroot; + bool ret = spell(word, info, root ? &sroot : NULL); + if (root) { + if (sroot.empty()) { + *root = NULL; + } else { + *root = mystrdup(sroot.c_str()); + } + } + return ret; +} + +int HunspellImpl::suggest(char*** slst, const char* word) { + std::vector<std::string> suggests = suggest(word); + return munge_vector(slst, suggests); +} + +int HunspellImpl::suffix_suggest(char*** slst, const char* root_word) { + std::vector<std::string> stems = suffix_suggest(root_word); + return munge_vector(slst, stems); +} + +void HunspellImpl::free_list(char*** slst, int n) { + if (slst && *slst) { + for (int i = 0; i < n; i++) + free((*slst)[i]); + free(*slst); + *slst = NULL; + } +} + +char* HunspellImpl::get_dic_encoding() { + return &encoding[0]; +} + +int HunspellImpl::analyze(char*** slst, const char* word) { + std::vector<std::string> stems = analyze(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, const char* word) { + std::vector<std::string> stems = stem(word); + return munge_vector(slst, stems); +} + +int HunspellImpl::stem(char*** slst, char** desc, int n) { + std::vector<std::string> morph; + morph.reserve(n); + for (int i = 0; i < n; ++i) + morph.push_back(desc[i]); + + std::vector<std::string> stems = stem(morph); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, const char* pattern) { + std::vector<std::string> stems = generate(word, pattern); + return munge_vector(slst, stems); +} + +int HunspellImpl::generate(char*** slst, const char* word, char** pl, int pln) { + std::vector<std::string> morph; + morph.reserve(pln); + for (int i = 0; i < pln; ++i) + morph.push_back(pl[i]); + + std::vector<std::string> stems = generate(word, morph); + return munge_vector(slst, stems); +} + +const char* HunspellImpl::get_wordchars() const { + return get_wordchars_cpp().c_str(); +} + +const char* HunspellImpl::get_version() const { + return get_version_cpp().c_str(); +} + +int HunspellImpl::input_conv(const char* word, char* dest, size_t destsize) { + std::string d; + bool ret = input_conv(word, d); + if (ret && d.size() < destsize) { + strncpy(dest, d.c_str(), destsize); + return 1; + } + return 0; +} + +Hunspell::Hunspell(const char* affpath, const char* dpath, const char* key) + : m_Impl(new HunspellImpl(affpath, dpath, key)) { +} + +Hunspell::~Hunspell() { + delete m_Impl; +} + +// load extra dictionaries +int Hunspell::add_dic(const char* dpath, const char* key) { + return m_Impl->add_dic(dpath, key); +} + +bool Hunspell::spell(const std::string& word, int* info, std::string* root) { + return m_Impl->spell(word, info, root); +} + +std::vector<std::string> Hunspell::suggest(const std::string& word) { + return m_Impl->suggest(word); +} + +std::vector<std::string> Hunspell::suffix_suggest(const std::string& root_word) { + return m_Impl->suffix_suggest(root_word); +} + +const std::string& Hunspell::get_dict_encoding() const { + return m_Impl->get_dict_encoding(); +} + +std::vector<std::string> Hunspell::stem(const std::vector<std::string>& desc) { + return m_Impl->stem(desc); +} + +std::vector<std::string> Hunspell::stem(const std::string& word) { + return m_Impl->stem(word); +} + +const std::string& Hunspell::get_wordchars_cpp() const { + return m_Impl->get_wordchars_cpp(); +} + +const std::vector<w_char>& Hunspell::get_wordchars_utf16() const { + return m_Impl->get_wordchars_utf16(); +} + +int Hunspell::add(const std::string& word) { + return m_Impl->add(word); +} + +int Hunspell::add_with_affix(const std::string& word, const std::string& example) { + return m_Impl->add_with_affix(word, example); +} + +int Hunspell::remove(const std::string& word) { + return m_Impl->remove(word); +} + +const std::string& Hunspell::get_version_cpp() const { + return m_Impl->get_version_cpp(); +} + +struct cs_info* Hunspell::get_csconv() { + return m_Impl->get_csconv(); +} + +std::vector<std::string> Hunspell::analyze(const std::string& word) { + return m_Impl->analyze(word); +} + +std::vector<std::string> Hunspell::generate(const std::string& word, const std::vector<std::string>& pl) { + return m_Impl->generate(word, pl); +} + +std::vector<std::string> Hunspell::generate(const std::string& word, const std::string& pattern) { + return m_Impl->generate(word, pattern); +} + +int Hunspell::get_langnum() const { + return m_Impl->get_langnum(); +} + +bool Hunspell::input_conv(const std::string& word, std::string& dest) { + return m_Impl->input_conv(word, dest); +} + +int Hunspell::spell(const char* word, int* info, char** root) { + return m_Impl->spell(word, info, root); +} + +int Hunspell::suggest(char*** slst, const char* word) { + return m_Impl->suggest(slst, word); +} + +int Hunspell::suffix_suggest(char*** slst, const char* root_word) { + return m_Impl->suffix_suggest(slst, root_word); +} + +void Hunspell::free_list(char*** slst, int n) { + m_Impl->free_list(slst, n); +} + +char* Hunspell::get_dic_encoding() { + return m_Impl->get_dic_encoding(); +} + +int Hunspell::analyze(char*** slst, const char* word) { + return m_Impl->analyze(slst, word); +} + +int Hunspell::stem(char*** slst, const char* word) { + return m_Impl->stem(slst, word); +} + +int Hunspell::stem(char*** slst, char** desc, int n) { + return m_Impl->stem(slst, desc, n); +} + +int Hunspell::generate(char*** slst, const char* word, const char* pattern) { + return m_Impl->generate(slst, word, pattern); +} + +int Hunspell::generate(char*** slst, const char* word, char** pl, int pln) { + return m_Impl->generate(slst, word, pl, pln); +} + +const char* Hunspell::get_wordchars() const { + return m_Impl->get_wordchars(); +} + +const char* Hunspell::get_version() const { + return m_Impl->get_version(); +} + +int Hunspell::input_conv(const char* word, char* dest, size_t destsize) { + return m_Impl->input_conv(word, dest, destsize); +} + +Hunhandle* Hunspell_create(const char* affpath, const char* dpath) { + return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath)); +} + +Hunhandle* Hunspell_create_key(const char* affpath, + const char* dpath, + const char* key) { + return reinterpret_cast<Hunhandle*>(new HunspellImpl(affpath, dpath, key)); +} + +void Hunspell_destroy(Hunhandle* pHunspell) { + delete reinterpret_cast<HunspellImpl*>(pHunspell); +} + +int Hunspell_add_dic(Hunhandle* pHunspell, const char* dpath) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->add_dic(dpath); +} + +int Hunspell_spell(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->spell(word); +} + +char* Hunspell_get_dic_encoding(Hunhandle* pHunspell) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->get_dic_encoding(); +} + +int Hunspell_suggest(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->suggest(slst, word); +} + +int Hunspell_analyze(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->analyze(slst, word); +} + +int Hunspell_stem(Hunhandle* pHunspell, char*** slst, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, word); +} + +int Hunspell_stem2(Hunhandle* pHunspell, char*** slst, char** desc, int n) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->stem(slst, desc, n); +} + +int Hunspell_generate(Hunhandle* pHunspell, + char*** slst, + const char* word, + const char* pattern) +{ + return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, pattern); +} + +int Hunspell_generate2(Hunhandle* pHunspell, + char*** slst, + const char* word, + char** desc, + int n) +{ + return reinterpret_cast<HunspellImpl*>(pHunspell)->generate(slst, word, desc, n); +} + +/* functions for run-time modification of the dictionary */ + +/* add word to the run-time dictionary */ + +int Hunspell_add(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->add(word); +} + +/* add word to the run-time dictionary with affix flags of + * the example (a dictionary word): Hunspell will recognize + * affixed forms of the new word, too. + */ + +int Hunspell_add_with_affix(Hunhandle* pHunspell, + const char* word, + const char* example) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->add_with_affix(word, example); +} + +/* remove word from the run-time dictionary */ + +int Hunspell_remove(Hunhandle* pHunspell, const char* word) { + return reinterpret_cast<HunspellImpl*>(pHunspell)->remove(word); +} + +void Hunspell_free_list(Hunhandle* pHunspell, char*** list, int n) { + reinterpret_cast<HunspellImpl*>(pHunspell)->free_list(list, n); +} diff --git a/extensions/spellcheck/hunspell/src/hunspell.h b/extensions/spellcheck/hunspell/src/hunspell.h new file mode 100644 index 0000000000..3aca30ab2f --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hunspell.h @@ -0,0 +1,162 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Hunspell, based on MySpell. + * + * The Initial Developers of the Original Code are + * Kevin Hendricks (MySpell) and Németh László (Hunspell). + * Portions created by the Initial Developers are Copyright (C) 2002-2005 + * the Initial Developers. All Rights Reserved. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef MYSPELLMGR_H_ +#define MYSPELLMGR_H_ + +#include "hunvisapi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct Hunhandle Hunhandle; + +LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath, + const char* dpath); + +LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath, + const char* dpath, + const char* key); + +LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell); + +/* load extra dictionaries (only dic files) + * output: 0 = additional dictionary slots available, 1 = slots are now full*/ +LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell, + const char* dpath); + +/* spell(word) - spellcheck word + * output: 0 = bad word, not 0 = good word + */ +LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*); + +LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell); + +/* suggest(suggestions, word) - search suggestions + * input: pointer to an array of strings pointer and the (bad) word + * array of strings pointer (here *slst) may not be initialized + * output: number of suggestions in string array, and suggestions in + * a newly allocated array of strings (*slts will be NULL when number + * of suggestion equals 0.) + */ +LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell, + char*** slst, + const char* word); + +/* morphological functions */ + +/* analyze(result, word) - morphological analysis of the word */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell, + char*** slst, + const char* word); + +/* stem(result, word) - stemmer function */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell, + char*** slst, + const char* word); + +/* stem(result, analysis, n) - get stems from a morph. analysis + * example: + * char ** result, result2; + * int n1 = Hunspell_analyze(result, "words"); + * int n2 = Hunspell_stem2(result2, result, n1); + */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell, + char*** slst, + char** desc, + int n); + +/* generate(result, word, word2) - morphological generation by example(s) */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell, + char*** slst, + const char* word, + const char* word2); + +/* generate(result, word, desc, n) - generation by morph. description(s) + * example: + * char ** result; + * char * affix = "is:plural"; // description depends from dictionaries, too + * int n = Hunspell_generate2(result, "word", &affix, 1); + * for (int i = 0; i < n; i++) printf("%s\n", result[i]); + */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell, + char*** slst, + const char* word, + char** desc, + int n); + +/* functions for run-time modification of the dictionary */ + +/* add word to the run-time dictionary */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell, + const char* word); + +/* add word to the run-time dictionary with affix flags of + * the example (a dictionary word): Hunspell will recognize + * affixed forms of the new word, too. + */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell, + const char* word, + const char* example); + +/* remove word from the run-time dictionary */ + +LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell, + const char* word); + +/* free suggestion lists */ + +LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell, + char*** slst, + int n); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/extensions/spellcheck/hunspell/src/hunspell.hxx b/extensions/spellcheck/hunspell/src/hunspell.hxx new file mode 100644 index 0000000000..8640a35ca1 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hunspell.hxx @@ -0,0 +1,232 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef MYSPELLMGR_HXX_ +#define MYSPELLMGR_HXX_ + +#include "hunvisapi.h" +#include "w_char.hxx" +#include "atypes.hxx" +#include <string> +#include <vector> + +#define SPELL_XML "<?xml?>" + +#ifndef MAXSUGGESTION +#define MAXSUGGESTION 15 +#endif + +#define MAXSHARPS 5 + +#ifndef MAXWORDLEN +#define MAXWORDLEN 100 +#endif + +#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +# define H_DEPRECATED __attribute__((__deprecated__)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1300) +# define H_DEPRECATED __declspec(deprecated) +#else +# define H_DEPRECATED +#endif + +class HunspellImpl; + +class LIBHUNSPELL_DLL_EXPORTED Hunspell { + private: + Hunspell(const Hunspell&); + Hunspell& operator=(const Hunspell&); + + private: + HunspellImpl* m_Impl; + + public: + /* Hunspell(aff, dic) - constructor of Hunspell class + * input: path of affix file and dictionary file + * + * In WIN32 environment, use UTF-8 encoded paths started with the long path + * prefix \\\\?\\ to handle system-independent character encoding and very + * long path names (without the long path prefix Hunspell will use fopen() + * with system-dependent character encoding instead of _wfopen()). + */ + Hunspell(const char* affpath, const char* dpath, const char* key = NULL); + ~Hunspell(); + + /* load extra dictionaries (only dic files) */ + int add_dic(const char* dpath, const char* key = NULL); + + /* spell(word) - spellcheck word + * output: false = bad word, true = good word + * + * plus output: + * info: information bit array, fields: + * SPELL_COMPOUND = a compound word + * SPELL_FORBIDDEN = an explicit forbidden word + * root: root (stem), when input is a word with affix(es) + */ + bool spell(const std::string& word, int* info = NULL, std::string* root = NULL); + H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL); + + /* suggest(suggestions, word) - search suggestions + * input: pointer to an array of strings pointer and the (bad) word + * array of strings pointer (here *slst) may not be initialized + * output: number of suggestions in string array, and suggestions in + * a newly allocated array of strings (*slts will be NULL when number + * of suggestion equals 0.) + */ + std::vector<std::string> suggest(const std::string& word); + H_DEPRECATED int suggest(char*** slst, const char* word); + + /* Suggest words from suffix rules + * suffix_suggest(suggestions, root_word) + * input: pointer to an array of strings pointer and the word + * array of strings pointer (here *slst) may not be initialized + * output: number of suggestions in string array, and suggestions in + * a newly allocated array of strings (*slts will be NULL when number + * of suggestion equals 0.) + */ + std::vector<std::string> suffix_suggest(const std::string& root_word); + H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word); + + /* deallocate suggestion lists */ + H_DEPRECATED void free_list(char*** slst, int n); + + const std::string& get_dict_encoding() const; + char* get_dic_encoding(); + + /* morphological functions */ + + /* analyze(result, word) - morphological analysis of the word */ + std::vector<std::string> analyze(const std::string& word); + H_DEPRECATED int analyze(char*** slst, const char* word); + + /* stem(word) - stemmer function */ + std::vector<std::string> stem(const std::string& word); + H_DEPRECATED int stem(char*** slst, const char* word); + + /* stem(analysis, n) - get stems from a morph. analysis + * example: + * char ** result, result2; + * int n1 = analyze(&result, "words"); + * int n2 = stem(&result2, result, n1); + */ + std::vector<std::string> stem(const std::vector<std::string>& morph); + H_DEPRECATED int stem(char*** slst, char** morph, int n); + + /* generate(result, word, word2) - morphological generation by example(s) */ + std::vector<std::string> generate(const std::string& word, const std::string& word2); + H_DEPRECATED int generate(char*** slst, const char* word, const char* word2); + + /* generate(result, word, desc, n) - generation by morph. description(s) + * example: + * char ** result; + * char * affix = "is:plural"; // description depends from dictionaries, too + * int n = generate(&result, "word", &affix, 1); + * for (int i = 0; i < n; i++) printf("%s\n", result[i]); + */ + std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl); + H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n); + + /* functions for run-time modification of the dictionary */ + + /* add word to the run-time dictionary */ + + int add(const std::string& word); + + /* add word to the run-time dictionary with affix flags of + * the example (a dictionary word): Hunspell will recognize + * affixed forms of the new word, too. + */ + + int add_with_affix(const std::string& word, const std::string& example); + + /* remove word from the run-time dictionary */ + + int remove(const std::string& word); + + /* other */ + + /* get extra word characters definied in affix file for tokenization */ + const char* get_wordchars() const; + const std::string& get_wordchars_cpp() const; + const std::vector<w_char>& get_wordchars_utf16() const; + + struct cs_info* get_csconv(); + + const char* get_version() const; + const std::string& get_version_cpp() const; + + int get_langnum() const; + + /* need for putdic */ + bool input_conv(const std::string& word, std::string& dest); + H_DEPRECATED int input_conv(const char* word, char* dest, size_t destsize); +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/hunvisapi.h b/extensions/spellcheck/hunspell/src/hunvisapi.h new file mode 100644 index 0000000000..ed0a502ba2 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/hunvisapi.h @@ -0,0 +1,18 @@ +#ifndef HUNSPELL_VISIBILITY_H_ +#define HUNSPELL_VISIBILITY_H_ + +#if defined(HUNSPELL_STATIC) +# define LIBHUNSPELL_DLL_EXPORTED +#elif defined(_WIN32) +# if defined(BUILDING_LIBHUNSPELL) +# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport) +# else +# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport) +# endif +#elif defined(BUILDING_LIBHUNSPELL) && 1 +# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default"))) +#else +# define LIBHUNSPELL_DLL_EXPORTED +#endif + +#endif diff --git a/extensions/spellcheck/hunspell/src/langnum.hxx b/extensions/spellcheck/hunspell/src/langnum.hxx new file mode 100644 index 0000000000..39e63efdaa --- /dev/null +++ b/extensions/spellcheck/hunspell/src/langnum.hxx @@ -0,0 +1,76 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef LANGNUM_HXX_ +#define LANGNUM_HXX_ + +/* + language numbers for language specific codes + see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199 +*/ + +enum { + LANG_ar = 96, + LANG_az = 100, // custom number + LANG_bg = 41, + LANG_ca = 37, + LANG_crh = 102, // custom number + LANG_cs = 42, + LANG_da = 45, + LANG_de = 49, + LANG_el = 30, + LANG_en = 01, + LANG_es = 34, + LANG_eu = 10, + LANG_fr = 02, + LANG_gl = 38, + LANG_hr = 78, + LANG_hu = 36, + LANG_it = 39, + LANG_la = 99, // custom number + LANG_lv = 101, // custom number + LANG_nl = 31, + LANG_pl = 48, + LANG_pt = 03, + LANG_ru = 07, + LANG_sv = 50, + LANG_tr = 90, + LANG_uk = 80, + LANG_xx = 999 +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/license.hunspell b/extensions/spellcheck/hunspell/src/license.hunspell new file mode 100644 index 0000000000..fad5a16230 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/license.hunspell @@ -0,0 +1,54 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): + * David Einstein + * Davide Prina + * Giuseppe Modugno + * Gianluca Turconi + * Simon Brouwer + * Noll János + * Bíró Árpád + * Goldman Eleonóra + * Sarlós Tamás + * Bencsáth Boldizsár + * Halácsy Péter + * Dvornik László + * Gefferth András + * Nagy Viktor + * Varga Dániel + * Chris Halls + * Rene Engelhard + * Bram Moolenaar + * Dafydd Jones + * Harri Pitkänen + * Andras Timar + * Tor Lillqvist + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ diff --git a/extensions/spellcheck/hunspell/src/license.myspell b/extensions/spellcheck/hunspell/src/license.myspell new file mode 100644 index 0000000000..2da5330750 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/license.myspell @@ -0,0 +1,61 @@ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * NOTE: A special thanks and credit goes to Geoff Kuenning + * the creator of ispell. MySpell's affix algorithms were + * based on those of ispell which should be noted is + * copyright Geoff Kuenning et.al. and now available + * under a BSD style license. For more information on ispell + * and affix compression in general, please see: + * http://www.cs.ucla.edu/ficus-members/geoff/ispell.html + * (the home page for ispell) + * + * An almost complete rewrite of MySpell for use by + * the Mozilla project has been developed by David Einstein + * (Deinst@world.std.com). David and I are now + * working on parallel development tracks to help + * our respective projects (Mozilla and OpenOffice.org + * and we will maintain full affix file and dictionary + * file compatibility and work on merging our versions + * of MySpell back into a single tree. David has been + * a significant help in improving MySpell. + * + * Special thanks also go to La'szlo' Ne'meth + * <nemethl@gyorsposta.hu> who is the author of the + * Hungarian dictionary and who developed and contributed + * the code to support compound words in MySpell + * and fixed numerous problems with the encoding + * case conversion tables. + * + */ diff --git a/extensions/spellcheck/hunspell/src/moz.build b/extensions/spellcheck/hunspell/src/moz.build new file mode 100644 index 0000000000..c6f7930262 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/moz.build @@ -0,0 +1,30 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +include("sources.mozbuild") + +UNIFIED_SOURCES += hunspell_sources + +DEFINES['HUNSPELL_STATIC'] = True + +FINAL_LIBRARY = 'xul' + +LOCAL_INCLUDES += [ + '../glue', +] + +# We allow warnings for third-party code that can be updated from upstream. +AllowCompilerWarnings() + +include('/ipc/chromium/chromium-config.mozbuild') +include('../glue/common.mozbuild') + +HunspellIncludes() + +if CONFIG['CC_TYPE'] in ('clang', 'clang-cl'): + CXXFLAGS += [ + '-Wno-implicit-fallthrough', + ] diff --git a/extensions/spellcheck/hunspell/src/phonet.cxx b/extensions/spellcheck/hunspell/src/phonet.cxx new file mode 100644 index 0000000000..69601a2872 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/phonet.cxx @@ -0,0 +1,270 @@ +/* phonetic.c - generic replacement aglogithms for phonetic transformation + Copyright (C) 2000 Bjoern Jacke + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation; + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; If not, see + <http://www.gnu.org/licenses/>. + + Changelog: + + 2000-01-05 Bjoern Jacke <bjoern at j3e.de> + Initial Release insprired by the article about phonetic + transformations out of c't 25/1999 + + 2007-07-26 Bjoern Jacke <bjoern at j3e.de> + Released under MPL/GPL/LGPL tri-license for Hunspell + + 2007-08-23 Laszlo Nemeth <nemeth at OOo> + Porting from Aspell to Hunspell using C-like structs +*/ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> + +#include "csutil.hxx" +#include "phonet.hxx" + +void init_phonet_hash(phonetable& parms) { + for (int i = 0; i < HASHSIZE; i++) { + parms.hash[i] = -1; + } + + for (int i = 0; parms.rules[i][0] != '\0'; i += 2) { + /** set hash value **/ + int k = (unsigned char)parms.rules[i][0]; + + if (parms.hash[k] < 0) { + parms.hash[k] = i; + } + } +} + +// like strcpy but safe if the strings overlap +// but only if dest < src +static inline void strmove(char* dest, char* src) { + while (*src) + *dest++ = *src++; + *dest = '\0'; +} + +static int myisalpha(char ch) { + if ((unsigned char)ch < 128) + return isalpha(ch); + return 1; +} + +/* Do phonetic transformation. */ +/* phonetic transcription algorithm */ +/* see: http://aspell.net/man-html/Phonetic-Code.html */ +/* convert string to uppercase before this call */ +std::string phonet(const std::string& inword, phonetable& parms) { + + int i, k = 0, p, z; + int k0, n0, p0 = -333; + char c; + typedef unsigned char uchar; + + size_t len = inword.size(); + if (len > MAXPHONETUTF8LEN) + return std::string(); + char word[MAXPHONETUTF8LEN + 1]; + strncpy(word, inword.c_str(), MAXPHONETUTF8LEN); + word[MAXPHONETUTF8LEN] = '\0'; + + std::string target; + /** check word **/ + i = z = 0; + while ((c = word[i]) != '\0') { + int n = parms.hash[(uchar)c]; + int z0 = 0; + + if (n >= 0 && !parms.rules[n].empty()) { + /** check all rules for the same letter **/ + while (parms.rules[n][0] == c) { + /** check whole string **/ + k = 1; /** number of found letters **/ + p = 5; /** default priority **/ + const char*s = parms.rules[n].c_str(); + s++; /** important for (see below) "*(s-1)" **/ + + while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) && + strchr("(-<^$", *s) == NULL) { + k++; + s++; + } + if (*s == '(') { + /** check letters in "(..)" **/ + if (myisalpha(word[i + k]) // ...could be implied? + && strchr(s + 1, word[i + k]) != NULL) { + k++; + while (*s != ')') + s++; + s++; + } + } + p0 = (int)*s; + k0 = k; + while (*s == '-' && k > 1) { + k--; + s++; + } + if (*s == '<') + s++; + if (isdigit((unsigned char)*s)) { + /** determine priority **/ + p = *s - '0'; + s++; + } + if (*s == '^' && *(s + 1) == '^') + s++; + + if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) && + (*(s + 1) != '$' || (!myisalpha(word[i + k0])))) || + (*s == '$' && i > 0 && myisalpha(word[i - 1]) && + (!myisalpha(word[i + k0])))) { + /** search for followup rules, if: **/ + /** parms.followup and k > 1 and NO '-' in searchstring **/ + char c0 = word[i + k - 1]; + n0 = parms.hash[(uchar)c0]; + + // if (parms.followup && k > 1 && n0 >= 0 + if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0' && !parms.rules[n0].empty()) { + /** test follow-up rule for "word[i+k]" **/ + while (parms.rules[n0][0] == c0) { + /** check whole string **/ + k0 = k; + p0 = 5; + s = parms.rules[n0].c_str(); + s++; + while (*s != '\0' && word[i + k0] == *s && + !isdigit((unsigned char)*s) && + strchr("(-<^$", *s) == NULL) { + k0++; + s++; + } + if (*s == '(') { + /** check letters **/ + if (myisalpha(word[i + k0]) && + strchr(s + 1, word[i + k0]) != NULL) { + k0++; + while (*s != ')' && *s != '\0') + s++; + if (*s == ')') + s++; + } + } + while (*s == '-') { + /** "k0" gets NOT reduced **/ + /** because "if (k0 == k)" **/ + s++; + } + if (*s == '<') + s++; + if (isdigit((unsigned char)*s)) { + p0 = *s - '0'; + s++; + } + + if (*s == '\0' + /** *s == '^' cuts **/ + || (*s == '$' && !myisalpha(word[i + k0]))) { + if (k0 == k) { + /** this is just a piece of the string **/ + n0 += 2; + continue; + } + + if (p0 < p) { + /** priority too low **/ + n0 += 2; + continue; + } + /** rule fits; stop search **/ + break; + } + n0 += 2; + } /** End of "while (parms.rules[n0][0] == c0)" **/ + + if (p0 >= p && parms.rules[n0][0] == c0) { + n += 2; + continue; + } + } /** end of follow-up stuff **/ + + /** replace string **/ + s = parms.rules[n + 1].c_str(); + p0 = (!parms.rules[n].empty() && + strchr(parms.rules[n].c_str() + 1, '<') != NULL) + ? 1 + : 0; + if (p0 == 1 && z == 0) { + /** rule with '<' is used **/ + if (!target.empty() && *s != '\0' && + (target[target.size()-1] == c || target[target.size()-1] == *s)) { + target.erase(target.size() - 1); + } + z0 = 1; + z = 1; + k0 = 0; + while (*s != '\0' && word[i + k0] != '\0') { + word[i + k0] = *s; + k0++; + s++; + } + if (k > k0) + strmove(&word[0] + i + k0, &word[0] + i + k); + + /** new "actual letter" **/ + c = word[i]; + } else { /** no '<' rule used **/ + i += k - 1; + z = 0; + while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) { + if (target.empty() || target[target.size()-1] != *s) { + target.push_back(*s); + } + s++; + } + /** new "actual letter" **/ + c = *s; + if (!parms.rules[n].empty() && + strstr(parms.rules[n].c_str() + 1, "^^") != NULL) { + if (c != '\0') { + target.push_back(c); + } + strmove(&word[0], &word[0] + i + 1); + i = 0; + z0 = 1; + } + } + break; + } /** end of follow-up stuff **/ + n += 2; + } /** end of while (parms.rules[n][0] == c) **/ + } /** end of if (n >= 0) **/ + if (z0 == 0) { + if (k && !p0 && target.size() < len && c != '\0') { + /** condense only double letters **/ + target.push_back(c); + /// printf("\n setting \n"); + } + + i++; + z = 0; + k = 0; + } + } /** end of while ((c = word[i]) != '\0') **/ + + return target; +} /** end of function "phonet" **/ diff --git a/extensions/spellcheck/hunspell/src/phonet.hxx b/extensions/spellcheck/hunspell/src/phonet.hxx new file mode 100644 index 0000000000..2d58b3ba1b --- /dev/null +++ b/extensions/spellcheck/hunspell/src/phonet.hxx @@ -0,0 +1,50 @@ +/* phonetic.c - generic replacement aglogithms for phonetic transformation + Copyright (C) 2000 Bjoern Jacke + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation; + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; If not, see + <http://www.gnu.org/licenses/>. + + Changelog: + + 2000-01-05 Bjoern Jacke <bjoern at j3e.de> + Initial Release insprired by the article about phonetic + transformations out of c't 25/1999 + + 2007-07-26 Bjoern Jacke <bjoern at j3e.de> + Released under MPL/GPL/LGPL tri-license for Hunspell + + 2007-08-23 Laszlo Nemeth <nemeth at OOo> + Porting from Aspell to Hunspell using C-like structs +*/ + +#ifndef PHONET_HXX_ +#define PHONET_HXX_ + +#define HASHSIZE 256 +#define MAXPHONETLEN 256 +#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4) + +#include "hunvisapi.h" + +struct phonetable { + char utf8; + std::vector<std::string> rules; + int hash[HASHSIZE]; +}; + +LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms); + +LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword, + phonetable& phone); + +#endif diff --git a/extensions/spellcheck/hunspell/src/replist.cxx b/extensions/spellcheck/hunspell/src/replist.cxx new file mode 100644 index 0000000000..1395ade607 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/replist.cxx @@ -0,0 +1,196 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <limits> + +#include "replist.hxx" +#include "csutil.hxx" + +RepList::RepList(int n) { + dat = (replentry**)malloc(sizeof(replentry*) * n); + if (dat == 0) + size = 0; + else + size = n; + pos = 0; +} + +RepList::~RepList() { + for (int i = 0; i < pos; i++) { + delete dat[i]; + } + free(dat); +} + +replentry* RepList::item(int n) { + return dat[n]; +} + +int RepList::find(const char* word) { + int p1 = 0; + int p2 = pos - 1; + int ret = -1; + while (p1 <= p2) { + int m = ((unsigned)p1 + (unsigned)p2) >> 1; + int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size()); + if (c < 0) + p2 = m - 1; + else if (c > 0) + p1 = m + 1; + else { // scan in the right half for a longer match + ret = m; + p1 = m + 1; + } + } + return ret; +} + +std::string RepList::replace(const char* word, int ind, bool atstart) { + int type = atstart ? 1 : 0; + if (ind < 0) + return std::string(); + if (strlen(word) == dat[ind]->pattern.size()) + type = atstart ? 3 : 2; + while (type && dat[ind]->outstrings[type].empty()) + type = (type == 2 && !atstart) ? 0 : type - 1; + return dat[ind]->outstrings[type]; +} + +int RepList::add(const std::string& in_pat1, const std::string& pat2) { + if (pos >= size || in_pat1.empty() || pat2.empty()) { + return 1; + } + // analyse word context + int type = 0; + std::string pat1(in_pat1); + if (pat1[0] == '_') { + pat1.erase(0, 1); + type = 1; + } + if (!pat1.empty() && pat1[pat1.size() - 1] == '_') { + type = type + 2; + pat1.erase(pat1.size() - 1); + } + mystrrep(pat1, "_", " "); + + // find existing entry + int m = find(pat1.c_str()); + if (m >= 0 && dat[m]->pattern == pat1) { + // since already used + dat[m]->outstrings[type] = pat2; + mystrrep(dat[m]->outstrings[type], "_", " "); + return 0; + } + + // make a new entry if none exists + replentry* r = new replentry; + if (r == NULL) + return 1; + r->pattern = pat1; + r->outstrings[type] = pat2; + mystrrep(r->outstrings[type], "_", " "); + dat[pos++] = r; + // sort to the right place in the list + int i; + for (i = pos - 1; i > 0; i--) { + if (strcmp(r->pattern.c_str(), dat[i - 1]->pattern.c_str()) < 0) { + dat[i] = dat[i - 1]; + } else + break; + } + dat[i] = r; + return 0; +} + +bool RepList::conv(const std::string& in_word, std::string& dest) { + dest.clear(); + + size_t wordlen = in_word.size(); + const char* word = in_word.c_str(); + + bool change = false; + for (size_t i = 0; i < wordlen; ++i) { + int n = find(word + i); + std::string l = replace(word + i, n, i == 0); + if (!l.empty()) { + dest.append(l); + i += dat[n]->pattern.size() - 1; + change = true; + } else { + dest.push_back(word[i]); + } + } + + return change; +} + diff --git a/extensions/spellcheck/hunspell/src/replist.hxx b/extensions/spellcheck/hunspell/src/replist.hxx new file mode 100644 index 0000000000..08daeb4488 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/replist.hxx @@ -0,0 +1,100 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* string replacement list class */ +#ifndef REPLIST_HXX_ +#define REPLIST_HXX_ + +#include "w_char.hxx" + +#include <string> +#include <vector> + +class RepList { + private: + RepList(const RepList&); + RepList& operator=(const RepList&); + + protected: + replentry** dat; + int size; + int pos; + + public: + explicit RepList(int n); + ~RepList(); + + int add(const std::string& pat1, const std::string& pat2); + replentry* item(int n); + int find(const char* word); + std::string replace(const char* word, int n, bool atstart); + bool conv(const std::string& word, std::string& dest); +}; +#endif diff --git a/extensions/spellcheck/hunspell/src/sources.mozbuild b/extensions/spellcheck/hunspell/src/sources.mozbuild new file mode 100644 index 0000000000..6649bedb1f --- /dev/null +++ b/extensions/spellcheck/hunspell/src/sources.mozbuild @@ -0,0 +1,17 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +hunspell_sources = [ + '../glue/mozHunspellRLBoxSandbox.cpp', + 'affentry.cxx', + 'affixmgr.cxx', + 'csutil.cxx', + 'hashmgr.cxx', + 'hunspell.cxx', + 'phonet.cxx', + 'replist.cxx', + 'suggestmgr.cxx', +] diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.cxx b/extensions/spellcheck/hunspell/src/suggestmgr.cxx new file mode 100644 index 0000000000..6b363debd5 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/suggestmgr.cxx @@ -0,0 +1,2263 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <time.h> + +#include "suggestmgr.hxx" +#include "htypes.hxx" +#include "csutil.hxx" + +const w_char W_VLINE = {'\0', '|'}; + +#define MAX_CHAR_DISTANCE 4 + +SuggestMgr::SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr) { + // register affix manager and check in string of chars to + // try when building candidate suggestions + pAMgr = aptr; + + csconv = NULL; + + ckeyl = 0; + ckey = NULL; + + ctryl = 0; + ctry = NULL; + + utf8 = 0; + langnum = 0; + complexprefixes = 0; + + maxSug = maxn; + nosplitsugs = 0; + maxngramsugs = MAXNGRAMSUGS; + maxcpdsugs = MAXCOMPOUNDSUGS; + + if (pAMgr) { + langnum = pAMgr->get_langnum(); + ckey = pAMgr->get_key_string(); + nosplitsugs = pAMgr->get_nosplitsugs(); + if (pAMgr->get_maxngramsugs() >= 0) + maxngramsugs = pAMgr->get_maxngramsugs(); + utf8 = pAMgr->get_utf8(); + if (pAMgr->get_maxcpdsugs() >= 0) + maxcpdsugs = pAMgr->get_maxcpdsugs(); + if (!utf8) { + csconv = get_current_cs(pAMgr->get_encoding()); + } + complexprefixes = pAMgr->get_complexprefixes(); + } + + if (ckey) { + if (utf8) { + ckeyl = u8_u16(ckey_utf, ckey); + } else { + ckeyl = strlen(ckey); + } + } + + if (tryme) { + ctry = mystrdup(tryme); + if (ctry) + ctryl = strlen(ctry); + if (ctry && utf8) { + ctryl = u8_u16(ctry_utf, tryme); + } + } + + // language with possible dash usage + // (latin letters or dash in TRY characters) + lang_with_dash_usage = (ctry && + ((strchr(ctry, '-') != NULL) || (strchr(ctry, 'a') != NULL))); +} + +SuggestMgr::~SuggestMgr() { + pAMgr = NULL; + if (ckey) + free(ckey); + ckey = NULL; + ckeyl = 0; + if (ctry) + free(ctry); + ctry = NULL; + ctryl = 0; + maxSug = 0; +#ifdef MOZILLA_CLIENT + delete[] csconv; +#endif +} + +void SuggestMgr::testsug(std::vector<std::string>& wlst, + const std::string& candidate, + int cpdsuggest, + int* timer, + clock_t* timelimit) { + int cwrd = 1; + if (wlst.size() == maxSug) + return; + for (size_t k = 0; k < wlst.size(); ++k) { + if (wlst[k] == candidate) { + cwrd = 0; + break; + } + } + if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) { + wlst.push_back(candidate); + } +} + +/* generate suggestions for a misspelled word + * pass in address of array of char * pointers + * onlycompoundsug: probably bad suggestions (need for ngram sugs, too) + * return value: true, if there is a good suggestion + * (REP, ph: or a dictionary word pair) + */ +bool SuggestMgr::suggest(std::vector<std::string>& slst, + const char* w, + int* onlycompoundsug) { + int nocompoundtwowords = 0; + std::vector<w_char> word_utf; + int wl = 0; + size_t nsugorig = slst.size(); + std::string w2; + const char* word = w; + size_t oldSug = 0; + bool good_suggestion = false; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + w2.assign(w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2.c_str(); + } + + if (utf8) { + wl = u8_u16(word_utf, word); + if (wl == -1) { + return false; + } + } + + for (int cpdsuggest = 0; (cpdsuggest < 2) && (nocompoundtwowords == 0) && !good_suggestion; + cpdsuggest++) { + + clock_t timelimit; + // initialize both in non-compound and compound cycles + timelimit = clock(); + + // limit compound suggestion + if (cpdsuggest > 0) + oldSug = slst.size(); + + // suggestions for an uppercase word (html -> HTML) + if (slst.size() < maxSug) { + size_t i = slst.size(); + if (utf8) + capchars_utf(slst, word_utf.data(), wl, cpdsuggest); + else + capchars(slst, word, cpdsuggest); + if (slst.size() > i) + good_suggestion = true; + } + + // perhaps we made a typical fault of spelling + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + size_t i = slst.size(); + replchars(slst, word, cpdsuggest); + if (slst.size() > i) + good_suggestion = true; + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // perhaps we made chose the wrong char from a related set + if ((slst.size() < maxSug) && + (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + mapchars(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // only suggest compound words when no other suggestion + if ((cpdsuggest == 0) && (slst.size() > nsugorig)) + nocompoundtwowords = 1; + + // did we swap the order of chars by mistake + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + swapchar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + swapchar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we swap the order of non adjacent chars by mistake + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + longswapchar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + longswapchar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we just hit the wrong key in place of a good char (case and keyboard) + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + badcharkey_utf(slst, word_utf.data(), wl, cpdsuggest); + else + badcharkey(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we add a char that should not be there + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + extrachar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + extrachar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we forgot a char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + forgotchar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + forgotchar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we move a char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + movechar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + movechar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we just hit the wrong key in place of a good char + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + badchar_utf(slst, word_utf.data(), wl, cpdsuggest); + else + badchar(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // did we double two characters + if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) { + if (utf8) + doubletwochars_utf(slst, word_utf.data(), wl, cpdsuggest); + else + doubletwochars(slst, word, cpdsuggest); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + // perhaps we forgot to hit space and two words ran together + // (dictionary word pairs have top priority here, so + // we always suggest them, in despite of nosplitsugs, and + // drop compound word and other suggestions) + if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) { + good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion); + } + if (clock() > timelimit + TIMELIMIT_SUGGESTION) + return good_suggestion; + + } // repeating ``for'' statement compounding support + + if (!nocompoundtwowords && (!slst.empty()) && onlycompoundsug) + *onlycompoundsug = 1; + + return good_suggestion; +} + +// suggestions for an uppercase word (html -> HTML) +void SuggestMgr::capchars_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + mkallcap_utf(candidate_utf, langnum); + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); +} + +// suggestions for an uppercase word (html -> HTML) +void SuggestMgr::capchars(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + mkallcap(candidate, csconv); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); +} + +// suggestions for when chose the wrong char out of a related set +int SuggestMgr::mapchars(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate; + clock_t timelimit; + int timer; + + int wl = strlen(word); + if (wl < 2 || !pAMgr) + return wlst.size(); + + const std::vector<mapentry>& maptable = pAMgr->get_maptable(); + if (maptable.empty()) + return wlst.size(); + + timelimit = clock(); + timer = MINTIMER; + return map_related(word, candidate, 0, wlst, cpdsuggest, + maptable, &timer, &timelimit); +} + +int SuggestMgr::map_related(const char* word, + std::string& candidate, + int wn, + std::vector<std::string>& wlst, + int cpdsuggest, + const std::vector<mapentry>& maptable, + int* timer, + clock_t* timelimit) { + if (*(word + wn) == '\0') { + int cwrd = 1; + for (size_t m = 0; m < wlst.size(); ++m) { + if (wlst[m] == candidate) { + cwrd = 0; + break; + } + } + if ((cwrd) && checkword(candidate, cpdsuggest, timer, timelimit)) { + if (wlst.size() < maxSug) { + wlst.push_back(candidate); + } + } + return wlst.size(); + } + int in_map = 0; + for (size_t j = 0; j < maptable.size(); ++j) { + for (size_t k = 0; k < maptable[j].size(); ++k) { + size_t len = maptable[j][k].size(); + if (strncmp(maptable[j][k].c_str(), word + wn, len) == 0) { + in_map = 1; + size_t cn = candidate.size(); + for (size_t l = 0; l < maptable[j].size(); ++l) { + candidate.resize(cn); + candidate.append(maptable[j][l]); + map_related(word, candidate, wn + len, wlst, + cpdsuggest, maptable, timer, timelimit); + if (!(*timer)) + return wlst.size(); + } + } + } + } + if (!in_map) { + candidate.push_back(*(word + wn)); + map_related(word, candidate, wn + 1, wlst, cpdsuggest, + maptable, timer, timelimit); + } + return wlst.size(); +} + +// suggestions for a typical fault of spelling, that +// differs with more, than 1 letter from the right form. +int SuggestMgr::replchars(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate; + int wl = strlen(word); + if (wl < 2 || !pAMgr) + return wlst.size(); + const std::vector<replentry>& reptable = pAMgr->get_reptable(); + for (size_t i = 0; i < reptable.size(); ++i) { + const char* r = word; + // search every occurence of the pattern in the word + while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) { + int type = (r == word) ? 1 : 0; + if (r - word + reptable[i].pattern.size() == strlen(word)) + type += 2; + while (type && reptable[i].outstrings[type].empty()) + type = (type == 2 && r != word) ? 0 : type - 1; + const std::string&out = reptable[i].outstrings[type]; + if (out.empty()) { + ++r; + continue; + } + candidate.assign(word); + candidate.resize(r - word); + candidate.append(reptable[i].outstrings[type]); + candidate.append(r + reptable[i].pattern.size()); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + // check REP suggestions with space + size_t sp = candidate.find(' '); + if (sp != std::string::npos) { + size_t prev = 0; + while (sp != std::string::npos) { + std::string prev_chunk = candidate.substr(prev, sp - prev); + if (checkword(prev_chunk, 0, NULL, NULL)) { + size_t oldns = wlst.size(); + std::string post_chunk = candidate.substr(sp + 1); + testsug(wlst, post_chunk, cpdsuggest, NULL, NULL); + if (oldns < wlst.size()) { + wlst[wlst.size() - 1] = candidate; + } + } + prev = sp + 1; + sp = candidate.find(' ', prev); + } + } + r++; // search for the next letter + } + } + return wlst.size(); +} + +// perhaps we doubled two characters +// (for example vacation -> vacacation) +// The recognized pattern with regex back-references: +// "(.)(.)\1\2\1" or "..(.)(.)\1\2" + +int SuggestMgr::doubletwochars(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + int state = 0; + int wl = strlen(word); + if (wl < 5 || !pAMgr) + return wlst.size(); + for (int i = 2; i < wl; i++) { + if (word[i] == word[i - 2]) { + state++; + if (state == 3 || (state == 2 && i >= 4)) { + std::string candidate(word, word + i - 1); + candidate.insert(candidate.end(), word + i + 1, word + wl); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + state = 0; + } + } else { + state = 0; + } + } + return wlst.size(); +} + +// perhaps we doubled two characters +// (for example vacation -> vacacation) +// The recognized pattern with regex back-references: +// "(.)(.)\1\2\1" or "..(.)(.)\1\2" + +int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + int state = 0; + if (wl < 5 || !pAMgr) + return wlst.size(); + for (int i = 2; i < wl; i++) { + if (word[i] == word[i - 2]) { + state++; + if (state == 3 || (state == 2 && i >= 4)) { + std::vector<w_char> candidate_utf(word, word + i - 1); + candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl); + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + state = 0; + } + } else { + state = 0; + } + } + return wlst.size(); +} + +// error is wrong char in place of correct one (case and keyboard related +// version) +int SuggestMgr::badcharkey(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + + // swap out each char one by one and try uppercase and neighbor + // keyboard chars in its place to see if that makes a good word + for (size_t i = 0; i < candidate.size(); ++i) { + char tmpc = candidate[i]; + // check with uppercase letters + candidate[i] = csconv[((unsigned char)tmpc)].cupper; + if (tmpc != candidate[i]) { + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + candidate[i] = tmpc; + } + // check neighbor characters in keyboard string + if (!ckey) + continue; + char* loc = strchr(ckey, tmpc); + while (loc) { + if ((loc > ckey) && (*(loc - 1) != '|')) { + candidate[i] = *(loc - 1); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + if ((*(loc + 1) != '|') && (*(loc + 1) != '\0')) { + candidate[i] = *(loc + 1); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + loc = strchr(loc + 1, tmpc); + } + candidate[i] = tmpc; + } + return wlst.size(); +} + +// error is wrong char in place of correct one (case and keyboard related +// version) +int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::string candidate; + std::vector<w_char> candidate_utf(word, word + wl); + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (int i = 0; i < wl; i++) { + w_char tmpc = candidate_utf[i]; + // check with uppercase letters + candidate_utf[i] = upper_utf(candidate_utf[i], 1); + if (tmpc != candidate_utf[i]) { + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + candidate_utf[i] = tmpc; + } + // check neighbor characters in keyboard string + if (!ckey) + continue; + size_t loc = 0; + while ((loc < ckeyl) && ckey_utf[loc] != tmpc) + ++loc; + while (loc < ckeyl) { + if ((loc > 0) && ckey_utf[loc - 1] != W_VLINE) { + candidate_utf[i] = ckey_utf[loc - 1]; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + if (((loc + 1) < ckeyl) && (ckey_utf[loc + 1] != W_VLINE)) { + candidate_utf[i] = ckey_utf[loc + 1]; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + do { + loc++; + } while ((loc < ckeyl) && ckey_utf[loc] != tmpc); + } + candidate_utf[i] = tmpc; + } + return wlst.size(); +} + +// error is wrong char in place of correct one +int SuggestMgr::badchar(std::vector<std::string>& wlst, const char* word, int cpdsuggest) { + std::string candidate(word); + clock_t timelimit = clock(); + int timer = MINTIMER; + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (size_t j = 0; j < ctryl; ++j) { + for (std::string::reverse_iterator aI = candidate.rbegin(), aEnd = candidate.rend(); aI != aEnd; ++aI) { + char tmpc = *aI; + if (ctry[j] == tmpc) + continue; + *aI = ctry[j]; + testsug(wlst, candidate, cpdsuggest, &timer, &timelimit); + if (!timer) + return wlst.size(); + *aI = tmpc; + } + } + return wlst.size(); +} + +// error is wrong char in place of correct one +int SuggestMgr::badchar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + std::string candidate; + clock_t timelimit = clock(); + int timer = MINTIMER; + // swap out each char one by one and try all the tryme + // chars in its place to see if that makes a good word + for (size_t j = 0; j < ctryl; ++j) { + for (int i = wl - 1; i >= 0; i--) { + w_char tmpc = candidate_utf[i]; + if (tmpc == ctry_utf[j]) + continue; + candidate_utf[i] = ctry_utf[j]; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, &timer, &timelimit); + if (!timer) + return wlst.size(); + candidate_utf[i] = tmpc; + } + } + return wlst.size(); +} + +// error is word has an extra letter it does not need +int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + if (candidate_utf.size() < 2) + return wlst.size(); + // try omitting one char of word at a time + for (size_t i = 0; i < candidate_utf.size(); ++i) { + size_t index = candidate_utf.size() - 1 - i; + w_char tmpc = candidate_utf[index]; + candidate_utf.erase(candidate_utf.begin() + index); + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + candidate_utf.insert(candidate_utf.begin() + index, tmpc); + } + return wlst.size(); +} + +// error is word has an extra letter it does not need +int SuggestMgr::extrachar(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + if (candidate.size() < 2) + return wlst.size(); + // try omitting one char of word at a time + for (size_t i = 0; i < candidate.size(); ++i) { + size_t index = candidate.size() - 1 - i; + char tmpc = candidate[index]; + candidate.erase(candidate.begin() + index); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + candidate.insert(candidate.begin() + index, tmpc); + } + return wlst.size(); +} + +// error is missing a letter it needs +int SuggestMgr::forgotchar(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + clock_t timelimit = clock(); + int timer = MINTIMER; + + // try inserting a tryme character before every letter (and the null + // terminator) + for (size_t k = 0; k < ctryl; ++k) { + for (size_t i = 0; i <= candidate.size(); ++i) { + size_t index = candidate.size() - i; + candidate.insert(candidate.begin() + index, ctry[k]); + testsug(wlst, candidate, cpdsuggest, &timer, &timelimit); + if (!timer) + return wlst.size(); + candidate.erase(candidate.begin() + index); + } + } + return wlst.size(); +} + +// error is missing a letter it needs +int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + clock_t timelimit = clock(); + int timer = MINTIMER; + + // try inserting a tryme character at the end of the word and before every + // letter + for (size_t k = 0; k < ctryl; ++k) { + for (size_t i = 0; i <= candidate_utf.size(); ++i) { + size_t index = candidate_utf.size() - i; + candidate_utf.insert(candidate_utf.begin() + index, ctry_utf[k]); + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, &timer, &timelimit); + if (!timer) + return wlst.size(); + candidate_utf.erase(candidate_utf.begin() + index); + } + } + return wlst.size(); +} + +/* error is should have been two words + * return value is true, if there is a dictionary word pair, + * or there was already a good suggestion before calling + * this function. + */ +bool SuggestMgr::twowords(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest, + bool good) { + int c2; + int forbidden = 0; + int cwrd; + + int wl = strlen(word); + if (wl < 3) + return false; + + if (langnum == LANG_hu) + forbidden = check_forbidden(word, wl); + + char* candidate = (char*)malloc(wl + 2); + strcpy(candidate + 1, word); + + // split the string into two pieces after every char + // if both pieces are good words make them a suggestion + for (char* p = candidate + 1; p[1] != '\0'; p++) { + p[-1] = *p; + // go to end of the UTF-8 character + while (utf8 && ((p[1] & 0xc0) == 0x80)) { + *p = p[1]; + p++; + } + if (utf8 && p[1] == '\0') + break; // last UTF-8 character + + // Suggest only word pairs, if they are listed in the dictionary. + // For example, adding "a lot" to the English dic file will + // result only "alot" -> "a lot" suggestion instead of + // "alto, slot, alt, lot, allot, aloft, aloe, clot, plot, blot, a lot". + // Note: using "ph:alot" keeps the other suggestions: + // a lot ph:alot + // alot -> a lot, alto, slot... + *p = ' '; + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { + // remove not word pair suggestions + if (!good) { + good = true; + wlst.clear(); + } + wlst.insert(wlst.begin(), candidate); + } + + // word pairs with dash? + if (lang_with_dash_usage) { + *p = '-'; + + if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) { + // remove not word pair suggestions + if (!good) { + good = true; + wlst.clear(); + } + wlst.insert(wlst.begin(), candidate); + } + } + + if (wlst.size() < maxSug && !nosplitsugs && !good) { + *p = '\0'; + int c1 = checkword(candidate, cpdsuggest, NULL, NULL); + if (c1) { + c2 = checkword((p + 1), cpdsuggest, NULL, NULL); + if (c2) { + // spec. Hungarian code (TODO need a better compound word support) + if ((langnum == LANG_hu) && !forbidden && + // if 3 repeating letter, use - instead of space + (((p[-1] == p[1]) && + (((p > candidate + 1) && (p[-1] == p[-2])) || (p[-1] == p[2]))) || + // or multiple compounding, with more, than 6 syllables + ((c1 == 3) && (c2 >= 2)))) + *p = '-'; + else + *p = ' '; + + cwrd = 1; + for (size_t k = 0; k < wlst.size(); ++k) { + if (wlst[k] == candidate) { + cwrd = 0; + break; + } + } + + if (cwrd && (wlst.size() < maxSug)) + wlst.push_back(candidate); + + // add two word suggestion with dash, depending on the language + // Note that cwrd doesn't modified for REP twoword sugg. + if ( !nosplitsugs && lang_with_dash_usage && + mystrlen(p + 1) > 1 && mystrlen(candidate) - mystrlen(p) > 1) { + *p = '-'; + for (size_t k = 0; k < wlst.size(); ++k) { + if (wlst[k] == candidate) { + cwrd = 0; + break; + } + } + + if ((wlst.size() < maxSug) && cwrd) + wlst.push_back(candidate); + } + } + } + } + } + free(candidate); + return good; +} + +// error is adjacent letter were swapped +int SuggestMgr::swapchar(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + if (candidate.size() < 2) + return wlst.size(); + + // try swapping adjacent chars one by one + for (size_t i = 0; i < candidate.size() - 1; ++i) { + std::swap(candidate[i], candidate[i+1]); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + std::swap(candidate[i], candidate[i+1]); + } + + // try double swaps for short words + // ahev -> have, owudl -> would + if (candidate.size() == 4 || candidate.size() == 5) { + candidate[0] = word[1]; + candidate[1] = word[0]; + candidate[2] = word[2]; + candidate[candidate.size() - 2] = word[candidate.size() - 1]; + candidate[candidate.size() - 1] = word[candidate.size() - 2]; + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + if (candidate.size() == 5) { + candidate[0] = word[0]; + candidate[1] = word[2]; + candidate[2] = word[1]; + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + } + + return wlst.size(); +} + +// error is adjacent letter were swapped +int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + if (candidate_utf.size() < 2) + return wlst.size(); + + std::string candidate; + // try swapping adjacent chars one by one + for (size_t i = 0; i < candidate_utf.size() - 1; ++i) { + std::swap(candidate_utf[i], candidate_utf[i+1]); + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + std::swap(candidate_utf[i], candidate_utf[i+1]); + } + + // try double swaps for short words + // ahev -> have, owudl -> would, suodn -> sound + if (candidate_utf.size() == 4 || candidate_utf.size() == 5) { + candidate_utf[0] = word[1]; + candidate_utf[1] = word[0]; + candidate_utf[2] = word[2]; + candidate_utf[candidate_utf.size() - 2] = word[candidate_utf.size() - 1]; + candidate_utf[candidate_utf.size() - 1] = word[candidate_utf.size() - 2]; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + if (candidate_utf.size() == 5) { + candidate_utf[0] = word[0]; + candidate_utf[1] = word[2]; + candidate_utf[2] = word[1]; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + } + return wlst.size(); +} + +// error is not adjacent letter were swapped +int SuggestMgr::longswapchar(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + // try swapping not adjacent chars one by one + for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { + for (std::string::iterator q = candidate.begin(); q < candidate.end(); ++q) { + size_t distance = std::abs(std::distance(q, p)); + if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { + std::swap(*p, *q); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + std::swap(*p, *q); + } + } + } + return wlst.size(); +} + +// error is adjacent letter were swapped +int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + // try swapping not adjacent chars + for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { + for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) { + size_t distance = std::abs(std::distance(q, p)); + if (distance > 1 && distance <= MAX_CHAR_DISTANCE) { + std::swap(*p, *q); + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + std::swap(*p, *q); + } + } + } + return wlst.size(); +} + +// error is a letter was moved +int SuggestMgr::movechar(std::vector<std::string>& wlst, + const char* word, + int cpdsuggest) { + std::string candidate(word); + if (candidate.size() < 2) + return wlst.size(); + + // try moving a char + for (std::string::iterator p = candidate.begin(); p < candidate.end(); ++p) { + for (std::string::iterator q = p + 1; q < candidate.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + std::copy(word, word + candidate.size(), candidate.begin()); + } + + for (std::string::reverse_iterator p = candidate.rbegin(), pEnd = candidate.rend() - 1; p != pEnd; ++p) { + for (std::string::reverse_iterator q = p + 1, qEnd = candidate.rend(); q != qEnd && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + std::copy(word, word + candidate.size(), candidate.begin()); + } + + return wlst.size(); +} + +// error is a letter was moved +int SuggestMgr::movechar_utf(std::vector<std::string>& wlst, + const w_char* word, + int wl, + int cpdsuggest) { + std::vector<w_char> candidate_utf(word, word + wl); + if (candidate_utf.size() < 2) + return wlst.size(); + + // try moving a char + for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) { + for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + std::copy(word, word + candidate_utf.size(), candidate_utf.begin()); + } + + for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) { + for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) <= MAX_CHAR_DISTANCE; ++q) { + std::swap(*q, *(q - 1)); + if (std::distance(p, q) < 2) + continue; // omit swap char + std::string candidate; + u16_u8(candidate, candidate_utf); + testsug(wlst, candidate, cpdsuggest, NULL, NULL); + } + std::copy(word, word + candidate_utf.size(), candidate_utf.begin()); + } + + return wlst.size(); +} + +// generate a set of suggestions for very poorly spelled words +void SuggestMgr::ngsuggest(std::vector<std::string>& wlst, + const char* w, + const std::vector<HashMgr*>& rHMgr, + int captype) { + int lval; + int sc; + int lp, lpphon; + int nonbmp = 0; + + // exhaustively search through all root words + // keeping track of the MAX_ROOTS most similar root words + struct hentry* roots[MAX_ROOTS]; + char* rootsphon[MAX_ROOTS]; + int scores[MAX_ROOTS]; + int scoresphon[MAX_ROOTS]; + for (int i = 0; i < MAX_ROOTS; i++) { + roots[i] = NULL; + scores[i] = -100 * i; + rootsphon[i] = NULL; + scoresphon[i] = -100 * i; + } + lp = MAX_ROOTS - 1; + lpphon = MAX_ROOTS - 1; + int low = NGRAM_LOWERING; + + std::string w2; + const char* word = w; + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + w2.assign(w); + if (utf8) + reverseword_utf(w2); + else + reverseword(w2); + word = w2.c_str(); + } + + std::vector<w_char> u8; + int nc = strlen(word); + int n = (utf8) ? u8_u16(u8, word) : nc; + + // set character based ngram suggestion for words with non-BMP Unicode + // characters + if (n == -1) { + utf8 = 0; // XXX not state-free + n = nc; + nonbmp = 1; + low = 0; + } + + struct hentry* hp = NULL; + int col = -1; + phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL; + std::string target; + std::string candidate; + std::vector<w_char> w_candidate; + if (ph) { + if (utf8) { + u8_u16(w_candidate, word); + mkallcap_utf(w_candidate, langnum); + u16_u8(candidate, w_candidate); + } else { + candidate.assign(word); + if (!nonbmp) + mkallcap(candidate, csconv); + } + target = phonet(candidate, *ph); // XXX phonet() is 8-bit (nc, not n) + } + + FLAG forbiddenword = pAMgr ? pAMgr->get_forbiddenword() : FLAG_NULL; + FLAG nosuggest = pAMgr ? pAMgr->get_nosuggest() : FLAG_NULL; + FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL; + FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL; + + std::vector<w_char> w_word, w_target; + if (utf8) { + u8_u16(w_word, word); + u8_u16(w_target, target); + } + + std::string f; + std::vector<w_char> w_f; + + for (size_t i = 0; i < rHMgr.size(); ++i) { + while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { + // skip exceptions + if ( + // skip it, if the word length different by 5 or + // more characters (to avoid strange suggestions) + // (except Unicode characters over BMP) + (((abs(n - hp->clen) > 4) && !nonbmp)) || + // don't suggest capitalized dictionary words for + // lower case misspellings in ngram suggestions, except + // - PHONE usage, or + // - in the case of German, where not only proper + // nouns are capitalized, or + // - the capitalized word has special pronunciation + ((captype == NOCAP) && (hp->var & H_OPT_INITCAP) && + !ph && (langnum != LANG_de) && !(hp->var & H_OPT_PHON)) || + // or it has one of the following special flags + ((hp->astr) && (pAMgr) && + (TESTAFF(hp->astr, forbiddenword, hp->alen) || + TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || + TESTAFF(hp->astr, nosuggest, hp->alen) || + TESTAFF(hp->astr, nongramsuggest, hp->alen) || + TESTAFF(hp->astr, onlyincompound, hp->alen))) + ) + continue; + + if (utf8) { + u8_u16(w_f, HENTRY_WORD(hp)); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; + } else { + f.assign(HENTRY_WORD(hp)); + + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; + } + + // check special pronunciation + f.clear(); + if ((hp->var & H_OPT_PHON) && + copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { + int sc2; + if (utf8) { + u8_u16(w_f, f); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; + } else { + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; + } + if (sc2 > sc) + sc = sc2; + } + + int scphon = -20000; + if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { + if (utf8) { + u8_u16(w_candidate, HENTRY_WORD(hp)); + mkallcap_utf(w_candidate, langnum); + u16_u8(candidate, w_candidate); + } else { + candidate = HENTRY_WORD(hp); + mkallcap(candidate, csconv); + } + f = phonet(candidate, *ph); + if (utf8) { + u8_u16(w_f, f); + scphon = 2 * ngram(3, w_target, w_f, + NGRAM_LONGER_WORSE); + } else { + scphon = 2 * ngram(3, target, f, + NGRAM_LONGER_WORSE); + } + } + + if (sc > scores[lp]) { + scores[lp] = sc; + roots[lp] = hp; + lval = sc; + for (int j = 0; j < MAX_ROOTS; j++) + if (scores[j] < lval) { + lp = j; + lval = scores[j]; + } + } + + if (scphon > scoresphon[lpphon]) { + scoresphon[lpphon] = scphon; + rootsphon[lpphon] = HENTRY_WORD(hp); + lval = scphon; + for (int j = 0; j < MAX_ROOTS; j++) + if (scoresphon[j] < lval) { + lpphon = j; + lval = scoresphon[j]; + } + } + } + } + + // find minimum threshold for a passable suggestion + // mangle original word three differnt ways + // and score them to generate a minimum acceptable score + std::vector<w_char> w_mw; + int thresh = 0; + for (int sp = 1; sp < 4; sp++) { + if (utf8) { + w_mw = w_word; + for (int k = sp; k < n; k += 4) { + w_mw[k].l = '*'; + w_mw[k].h = 0; + } + + if (low) { + // lowering dictionary word + mkallsmall_utf(w_mw, langnum); + } + + thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH); + } else { + std::string mw = word; + for (int k = sp; k < n; k += 4) + mw[k] = '*'; + + if (low) { + // lowering dictionary word + mkallsmall(mw, csconv); + } + + thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH); + } + } + thresh = thresh / 3; + thresh--; + + // now expand affixes on each of these root words and + // and use length adjusted ngram scores to select + // possible suggestions + char* guess[MAX_GUESS]; + char* guessorig[MAX_GUESS]; + int gscore[MAX_GUESS]; + for (int i = 0; i < MAX_GUESS; i++) { + guess[i] = NULL; + guessorig[i] = NULL; + gscore[i] = -100 * i; + } + + lp = MAX_GUESS - 1; + + struct guessword* glst; + glst = (struct guessword*)calloc(MAX_WORDS, sizeof(struct guessword)); + if (!glst) { + if (nonbmp) + utf8 = 1; + return; + } + + for (int i = 0; i < MAX_ROOTS; i++) { + if (roots[i]) { + struct hentry* rp = roots[i]; + + f.clear(); + const char *field = NULL; + if ((rp->var & H_OPT_PHON) && copy_field(f, HENTRY_DATA(rp), MORPH_PHON)) + field = f.c_str(); + int nw = pAMgr->expand_rootword( + glst, MAX_WORDS, HENTRY_WORD(rp), rp->blen, rp->astr, rp->alen, word, + nc, field); + + for (int k = 0; k < nw; k++) { + if (utf8) { + u8_u16(w_f, glst[k].word); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + + sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon; + } else { + f = glst[k].word; + + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + + sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon; + } + + if (sc > thresh) { + if (sc > gscore[lp]) { + if (guess[lp]) { + free(guess[lp]); + if (guessorig[lp]) { + free(guessorig[lp]); + guessorig[lp] = NULL; + } + } + gscore[lp] = sc; + guess[lp] = glst[k].word; + guessorig[lp] = glst[k].orig; + lval = sc; + for (int j = 0; j < MAX_GUESS; j++) + if (gscore[j] < lval) { + lp = j; + lval = gscore[j]; + } + } else { + free(glst[k].word); + if (glst[k].orig) + free(glst[k].orig); + } + } else { + free(glst[k].word); + if (glst[k].orig) + free(glst[k].orig); + } + } + } + } + free(glst); + + // now we are done generating guesses + // sort in order of decreasing score + + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); + if (ph) + bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); + + // weight suggestions with a similarity index, based on + // the longest common subsequent algorithm and resort + + int is_swap = 0; + int re = 0; + double fact = 1.0; + if (pAMgr) { + int maxd = pAMgr->get_maxdiff(); + if (maxd >= 0) + fact = (10.0 - maxd) / 5.0; + } + + std::vector<w_char> w_gl; + for (int i = 0; i < MAX_GUESS; i++) { + if (guess[i]) { + // lowering guess[i] + std::string gl; + int len; + if (utf8) { + len = u8_u16(w_gl, guess[i]); + mkallsmall_utf(w_gl, langnum); + u16_u8(gl, w_gl); + } else { + gl.assign(guess[i]); + if (!nonbmp) + mkallsmall(gl, csconv); + len = strlen(guess[i]); + } + + int _lcs = lcslen(word, gl.c_str()); + + // same characters with different casing + if ((n == len) && (n == _lcs)) { + gscore[i] += 2000; + break; + } + // using 2-gram instead of 3, and other weightening + + if (utf8) { + u8_u16(w_gl, gl); + //w_gl is lowercase already at this point + re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + if (low) { + w_f = w_word; + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } else { + re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } + } else { + //gl is lowercase already at this point + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + if (low) { + f = word; + // lowering dictionary word + mkallsmall(f, csconv); + re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } else { + re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } + } + + int ngram_score, leftcommon_score; + if (utf8) { + //w_gl is lowercase already at this point + ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH); + leftcommon_score = leftcommonsubstring(w_word, w_gl); + } else { + //gl is lowercase already at this point + ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH); + leftcommon_score = leftcommonsubstring(word, gl.c_str()); + } + gscore[i] = + // length of longest common subsequent minus length difference + 2 * _lcs - abs((int)(n - len)) + + // weight length of the left common substring + leftcommon_score + + // weight equal character positions + (!nonbmp && commoncharacterpositions(word, gl.c_str(), &is_swap) + ? 1 + : 0) + + // swap character (not neighboring) + ((is_swap) ? 10 : 0) + + // ngram + ngram_score + + // weighted ngrams + re + + // different limit for dictionaries with PHONE rules + (ph ? (re < len * fact ? -1000 : 0) + : (re < (n + len) * fact ? -1000 : 0)); + } + } + + bubblesort(&guess[0], &guessorig[0], &gscore[0], MAX_GUESS); + + // phonetic version + if (ph) + for (int i = 0; i < MAX_ROOTS; i++) { + if (rootsphon[i]) { + // lowering rootphon[i] + std::string gl; + int len; + if (utf8) { + len = u8_u16(w_gl, rootsphon[i]); + mkallsmall_utf(w_gl, langnum); + u16_u8(gl, w_gl); + } else { + gl.assign(rootsphon[i]); + if (!nonbmp) + mkallsmall(gl, csconv); + len = strlen(rootsphon[i]); + } + + // weight length of the left common substring + int leftcommon_score; + if (utf8) + leftcommon_score = leftcommonsubstring(w_word, w_gl); + else + leftcommon_score = leftcommonsubstring(word, gl.c_str()); + // heuristic weigthing of ngram scores + scoresphon[i] += 2 * lcslen(word, gl) - abs((int)(n - len)) + + leftcommon_score; + } + } + + if (ph) + bubblesort(&rootsphon[0], NULL, &scoresphon[0], MAX_ROOTS); + + // copy over + size_t oldns = wlst.size(); + + int same = 0; + for (int i = 0; i < MAX_GUESS; i++) { + if (guess[i]) { + if ((wlst.size() < oldns + maxngramsugs) && (wlst.size() < maxSug) && + (!same || (gscore[i] > 1000))) { + int unique = 1; + // leave only excellent suggestions, if exists + if (gscore[i] > 1000) + same = 1; + else if (gscore[i] < -100) { + same = 1; + // keep the best ngram suggestions, unless in ONLYMAXDIFF mode + if (wlst.size() > oldns || (pAMgr && pAMgr->get_onlymaxdiff())) { + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + continue; + } + } + for (size_t j = 0; j < wlst.size(); ++j) { + // don't suggest previous suggestions or a previous suggestion with + // prefixes or affixes + if ((!guessorig[i] && strstr(guess[i], wlst[j].c_str())) || + (guessorig[i] && strstr(guessorig[i], wlst[j].c_str())) || + // check forbidden words + !checkword(guess[i], 0, NULL, NULL)) { + unique = 0; + break; + } + } + if (unique) { + if (guessorig[i]) { + wlst.push_back(guessorig[i]); + } else { + wlst.push_back(guess[i]); + } + } + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + } else { + free(guess[i]); + if (guessorig[i]) + free(guessorig[i]); + } + } + } + + oldns = wlst.size(); + if (ph) + for (int i = 0; i < MAX_ROOTS; i++) { + if (rootsphon[i]) { + if ((wlst.size() < oldns + MAXPHONSUGS) && (wlst.size() < maxSug)) { + int unique = 1; + for (size_t j = 0; j < wlst.size(); ++j) { + // don't suggest previous suggestions or a previous suggestion with + // prefixes or affixes + if (strstr(rootsphon[i], wlst[j].c_str()) || + // check forbidden words + !checkword(rootsphon[i], 0, NULL, NULL)) { + unique = 0; + break; + } + } + if (unique) { + wlst.push_back(rootsphon[i]); + } + } + } + } + + if (nonbmp) + utf8 = 1; +} + +// see if a candidate suggestion is spelled correctly +// needs to check both root words and words with affixes + +// obsolote MySpell-HU modifications: +// return value 2 and 3 marks compounding with hyphen (-) +// `3' marks roots without suffix +int SuggestMgr::checkword(const std::string& word, + int cpdsuggest, + int* timer, + clock_t* timelimit) { + // check time limit + if (timer) { + (*timer)--; + if (!(*timer) && timelimit) { + if ((clock() - *timelimit) > TIMELIMIT) + return 0; + *timer = MAXPLUSTIMER; + } + } + + if (pAMgr) { + struct hentry* rv = NULL; + int nosuffix = 0; + + if (cpdsuggest == 1) { + if (pAMgr->get_compound()) { + struct hentry* rv2 = NULL; + struct hentry* rwords[100]; // buffer for COMPOUND pattern checking + rv = pAMgr->compound_check(word, 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, 1, 0); // EXT + if (rv && + (!(rv2 = pAMgr->lookup(word.c_str())) || !rv2->astr || + !(TESTAFF(rv2->astr, pAMgr->get_forbiddenword(), rv2->alen) || + TESTAFF(rv2->astr, pAMgr->get_nosuggest(), rv2->alen)))) + return 3; // XXX obsolote categorisation + only ICONV needs affix + // flag check? + } + return 0; + } + + rv = pAMgr->lookup(word.c_str()); + + if (rv) { + if ((rv->astr) && + (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_substandard(), rv->alen))) + return 0; + while (rv) { + if (rv->astr && + (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { + rv = rv->next_homonym; + } else + break; + } + } else + rv = pAMgr->prefix_check(word.c_str(), word.size(), + 0); // only prefix, and prefix + suffix XXX + + if (rv) { + nosuffix = 1; + } else { + rv = pAMgr->suffix_check(word.c_str(), word.size(), 0, NULL, + FLAG_NULL, FLAG_NULL, IN_CPD_NOT); // only suffix + } + + if (!rv && pAMgr->have_contclass()) { + rv = pAMgr->suffix_check_twosfx(word.c_str(), word.size(), 0, NULL, FLAG_NULL); + if (!rv) + rv = pAMgr->prefix_check_twosfx(word.c_str(), word.size(), 0, FLAG_NULL); + } + + // check forbidden words + if ((rv) && (rv->astr) && + (TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) || + TESTAFF(rv->astr, pAMgr->get_nosuggest(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) + return 0; + + if (rv) { // XXX obsolote + if ((pAMgr->get_compoundflag()) && + TESTAFF(rv->astr, pAMgr->get_compoundflag(), rv->alen)) + return 2 + nosuffix; + return 1; + } + } + return 0; +} + +int SuggestMgr::check_forbidden(const char* word, int len) { + if (pAMgr) { + struct hentry* rv = pAMgr->lookup(word); + if (rv && rv->astr && + (TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) + rv = NULL; + if (!(pAMgr->prefix_check(word, len, 1))) + rv = pAMgr->suffix_check(word, len, 0, NULL, + FLAG_NULL, FLAG_NULL, IN_CPD_NOT); // prefix+suffix, suffix + // check forbidden words + if ((rv) && (rv->astr) && + TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen)) + return 1; + } + return 0; +} + +std::string SuggestMgr::suggest_morph(const std::string& in_w) { + std::string result; + + struct hentry* rv = NULL; + + if (!pAMgr) + return std::string(); + + std::string w(in_w); + + // word reversing wrapper for complex prefixes + if (complexprefixes) { + if (utf8) + reverseword_utf(w); + else + reverseword(w); + } + + rv = pAMgr->lookup(w.c_str()); + + while (rv) { + if ((!rv->astr) || + !(TESTAFF(rv->astr, pAMgr->get_forbiddenword(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_needaffix(), rv->alen) || + TESTAFF(rv->astr, pAMgr->get_onlyincompound(), rv->alen))) { + if (!HENTRY_FIND(rv, MORPH_STEM)) { + result.push_back(MSEP_FLD); + result.append(MORPH_STEM); + result.append(w); + } + if (HENTRY_DATA(rv)) { + result.push_back(MSEP_FLD); + result.append(HENTRY_DATA2(rv)); + } + result.push_back(MSEP_REC); + } + rv = rv->next_homonym; + } + + std::string st = pAMgr->affix_check_morph(w.c_str(), w.size()); + if (!st.empty()) { + result.append(st); + } + + if (pAMgr->get_compound() && result.empty()) { + struct hentry* rwords[100]; // buffer for COMPOUND pattern checking + pAMgr->compound_check_morph(w.c_str(), w.size(), 0, 0, 100, 0, NULL, (hentry**)&rwords, 0, result, + NULL); + } + + line_uniq(result, MSEP_REC); + + return result; +} + +static int get_sfxcount(const char* morph) { + if (!morph || !*morph) + return 0; + int n = 0; + const char* old = morph; + morph = strstr(morph, MORPH_DERI_SFX); + if (!morph) + morph = strstr(old, MORPH_INFL_SFX); + if (!morph) + morph = strstr(old, MORPH_TERM_SFX); + while (morph) { + n++; + old = morph; + morph = strstr(morph + 1, MORPH_DERI_SFX); + if (!morph) + morph = strstr(old + 1, MORPH_INFL_SFX); + if (!morph) + morph = strstr(old + 1, MORPH_TERM_SFX); + } + return n; +} + +/* affixation */ +std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { + std::string result; + int sfxcount = get_sfxcount(pattern); + + if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) + return result; + + if (HENTRY_DATA(rv)) { + std::string aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen, + HENTRY_DATA(rv), pattern, 0); + if (!aff.empty()) { + result.append(aff); + result.push_back(MSEP_REC); + } + } + + // check all allomorphs + char* p = NULL; + if (HENTRY_DATA(rv)) + p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); + while (p) { + p += MORPH_TAG_LEN; + int plen = fieldlen(p); + std::string allomorph(p, plen); + struct hentry* rv2 = pAMgr->lookup(allomorph.c_str()); + while (rv2) { + // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= + // sfxcount) { + if (HENTRY_DATA(rv2)) { + char* st = (char*)strstr(HENTRY_DATA2(rv2), MORPH_STEM); + if (st && (strncmp(st + MORPH_TAG_LEN, HENTRY_WORD(rv), + fieldlen(st + MORPH_TAG_LEN)) == 0)) { + std::string aff = pAMgr->morphgen(HENTRY_WORD(rv2), rv2->blen, rv2->astr, + rv2->alen, HENTRY_DATA(rv2), pattern, 0); + if (!aff.empty()) { + result.append(aff); + result.push_back(MSEP_REC); + } + } + } + rv2 = rv2->next_homonym; + } + p = strstr(p + plen, MORPH_ALLOMORPH); + } + + return result; +} + +std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const std::string& in_pattern) { + if (desc.empty() || !pAMgr) + return std::string(); + + const char* pattern = in_pattern.c_str(); + std::string result2; + std::string newpattern; + struct hentry* rv = NULL; + + // search affixed forms with and without derivational suffixes + while (1) { + for (size_t k = 0; k < desc.size(); ++k) { + std::string result; + + // add compound word parts (except the last one) + const char* s = desc[k].c_str(); + const char* part = strstr(s, MORPH_PART); + if (part) { + const char* nextpart = strstr(part + 1, MORPH_PART); + while (nextpart) { + std::string field; + copy_field(field, part, MORPH_PART); + result.append(field); + part = nextpart; + nextpart = strstr(part + 1, MORPH_PART); + } + s = part; + } + + std::string tok(s); + size_t pos = tok.find(" | "); + while (pos != std::string::npos) { + tok[pos + 1] = MSEP_ALT; + pos = tok.find(" | ", pos); + } + std::vector<std::string> pl = line_tok(tok, MSEP_ALT); + for (size_t i = 0; i < pl.size(); ++i) { + // remove inflectional and terminal suffixes + size_t is = pl[i].find(MORPH_INFL_SFX); + if (is != std::string::npos) + pl[i].resize(is); + size_t ts = pl[i].find(MORPH_TERM_SFX); + while (ts != std::string::npos) { + pl[i][ts] = '_'; + ts = pl[i].find(MORPH_TERM_SFX); + } + const char* st = strstr(s, MORPH_STEM); + if (st) { + copy_field(tok, st, MORPH_STEM); + rv = pAMgr->lookup(tok.c_str()); + while (rv) { + std::string newpat(pl[i]); + newpat.append(pattern); + std::string sg = suggest_hentry_gen(rv, newpat.c_str()); + if (sg.empty()) + sg = suggest_hentry_gen(rv, pattern); + if (!sg.empty()) { + std::vector<std::string> gen = line_tok(sg, MSEP_REC); + for (size_t j = 0; j < gen.size(); ++j) { + result2.push_back(MSEP_REC); + result2.append(result); + if (pl[i].find(MORPH_SURF_PFX) != std::string::npos) { + std::string field; + copy_field(field, pl[i], MORPH_SURF_PFX); + result2.append(field); + } + result2.append(gen[j]); + } + } + rv = rv->next_homonym; + } + } + } + } + + if (!result2.empty() || !strstr(pattern, MORPH_DERI_SFX)) + break; + + newpattern.assign(pattern); + mystrrep(newpattern, MORPH_DERI_SFX, MORPH_TERM_SFX); + pattern = newpattern.c_str(); + } + return result2; +} + +// generate an n-gram score comparing s1 and s2, UTF16 version +int SuggestMgr::ngram(int n, + const std::vector<w_char>& su1, + const std::vector<w_char>& su2, + int opt) { + int nscore = 0; + int ns; + int l1; + int l2; + int test = 0; + + l1 = su1.size(); + l2 = su2.size(); + if (l2 == 0) + return 0; + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { + int k = 0; + for (int l = 0; l <= (l2 - j); l++) { + for (k = 0; k < j; k++) { + const w_char& c1 = su1[i + k]; + const w_char& c2 = su2[l + k]; + if ((c1.l != c2.l) || (c1.h != c2.h)) + break; + } + if (k == j) { + ns++; + break; + } + } + if (k != j && opt & NGRAM_WEIGHTED) { + ns--; + test++; + if (i == 0 || i == l1 - j) + ns--; // side weight + } + } + nscore = nscore + ns; + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) + break; + } + + ns = 0; + if (opt & NGRAM_LONGER_WORSE) + ns = (l2 - l1) - 2; + if (opt & NGRAM_ANY_MISMATCH) + ns = abs(l2 - l1) - 2; + ns = (nscore - ((ns > 0) ? ns : 0)); + return ns; +} + +// generate an n-gram score comparing s1 and s2, non-UTF16 version +int SuggestMgr::ngram(int n, + const std::string& s1, + const std::string& s2, + int opt) { + int nscore = 0; + int ns; + int l1; + int l2; + int test = 0; + + l2 = s2.size(); + if (l2 == 0) + return 0; + l1 = s1.size(); + for (int j = 1; j <= n; j++) { + ns = 0; + for (int i = 0; i <= (l1 - j); i++) { + //s2 is haystack, s1[i..i+j) is needle + if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) { + ns++; + } else if (opt & NGRAM_WEIGHTED) { + ns--; + test++; + if (i == 0 || i == l1 - j) + ns--; // side weight + } + } + nscore = nscore + ns; + if (ns < 2 && !(opt & NGRAM_WEIGHTED)) + break; + } + + ns = 0; + if (opt & NGRAM_LONGER_WORSE) + ns = (l2 - l1) - 2; + if (opt & NGRAM_ANY_MISMATCH) + ns = abs(l2 - l1) - 2; + ns = (nscore - ((ns > 0) ? ns : 0)); + return ns; +} + +// length of the left common substring of s1 and (decapitalised) s2, UTF version +int SuggestMgr::leftcommonsubstring( + const std::vector<w_char>& su1, + const std::vector<w_char>& su2) { + int l1 = su1.size(); + int l2 = su2.size(); + // decapitalize dictionary word + if (complexprefixes) { + if (l1 && l2 && su1[l1 - 1] == su2[l2 - 1]) + return 1; + } else { + unsigned short idx = su2.empty() ? 0 : (su2[0].h << 8) + su2[0].l; + unsigned short otheridx = su1.empty() ? 0 : (su1[0].h << 8) + su1[0].l; + if (otheridx != idx && (otheridx != unicodetolower(idx, langnum))) + return 0; + int i; + for (i = 1; (i < l1) && (i < l2) && (su1[i].l == su2[i].l) && + (su1[i].h == su2[i].h); + i++) + ; + return i; + } + return 0; +} + +// length of the left common substring of s1 and (decapitalised) s2, non-UTF +int SuggestMgr::leftcommonsubstring( + const char* s1, + const char* s2) { + if (complexprefixes) { + int l1 = strlen(s1); + int l2 = strlen(s2); + if (l1 <= l2 && s2[l1 - 1] == s2[l2 - 1]) + return 1; + } else if (csconv) { + const char* olds = s1; + // decapitalise dictionary word + if ((*s1 != *s2) && (*s1 != csconv[((unsigned char)*s2)].clower)) + return 0; + do { + s1++; + s2++; + } while ((*s1 == *s2) && (*s1 != '\0')); + return (int)(s1 - olds); + } + return 0; +} + +int SuggestMgr::commoncharacterpositions(const char* s1, + const char* s2, + int* is_swap) { + int num = 0; + int diff = 0; + int diffpos[2]; + *is_swap = 0; + if (utf8) { + std::vector<w_char> su1; + std::vector<w_char> su2; + int l1 = u8_u16(su1, s1); + int l2 = u8_u16(su2, s2); + + if (l1 <= 0 || l2 <= 0) + return 0; + + // decapitalize dictionary word + if (complexprefixes) { + su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum); + } else { + su2[0] = lower_utf(su2[0], langnum); + } + for (int i = 0; (i < l1) && (i < l2); i++) { + if (su1[i] == su2[i]) { + num++; + } else { + if (diff < 2) + diffpos[diff] = i; + diff++; + } + } + if ((diff == 2) && (l1 == l2) && + (su1[diffpos[0]] == su2[diffpos[1]]) && + (su1[diffpos[1]] == su2[diffpos[0]])) + *is_swap = 1; + } else { + size_t i; + std::string t(s2); + // decapitalize dictionary word + if (complexprefixes) { + size_t l2 = t.size(); + t[l2 - 1] = csconv[(unsigned char)t[l2 - 1]].clower; + } else { + mkallsmall(t, csconv); + } + for (i = 0; i < t.size() && (*(s1 + i) != 0); ++i) { + if (*(s1 + i) == t[i]) { + num++; + } else { + if (diff < 2) + diffpos[diff] = i; + diff++; + } + } + if ((diff == 2) && (*(s1 + i) == 0) && i == t.size() && + (*(s1 + diffpos[0]) == t[diffpos[1]]) && + (*(s1 + diffpos[1]) == t[diffpos[0]])) + *is_swap = 1; + } + return num; +} + +int SuggestMgr::mystrlen(const char* word) { + if (utf8) { + std::vector<w_char> w; + return u8_u16(w, word); + } else + return strlen(word); +} + +// sort in decreasing order of score +void SuggestMgr::bubblesort(char** rword, char** rword2, int* rsc, int n) { + int m = 1; + while (m < n) { + int j = m; + while (j > 0) { + if (rsc[j - 1] < rsc[j]) { + int sctmp = rsc[j - 1]; + char* wdtmp = rword[j - 1]; + rsc[j - 1] = rsc[j]; + rword[j - 1] = rword[j]; + rsc[j] = sctmp; + rword[j] = wdtmp; + if (rword2) { + wdtmp = rword2[j - 1]; + rword2[j - 1] = rword2[j]; + rword2[j] = wdtmp; + } + j--; + } else + break; + } + m++; + } + return; +} + +// longest common subsequence +void SuggestMgr::lcs(const char* s, + const char* s2, + int* l1, + int* l2, + char** result) { + int n, m; + std::vector<w_char> su; + std::vector<w_char> su2; + char* b; + char* c; + int i; + int j; + if (utf8) { + m = u8_u16(su, s); + n = u8_u16(su2, s2); + } else { + m = strlen(s); + n = strlen(s2); + } + c = (char*)malloc((m + 1) * (n + 1)); + b = (char*)malloc((m + 1) * (n + 1)); + if (!c || !b) { + if (c) + free(c); + if (b) + free(b); + *result = NULL; + return; + } + for (i = 1; i <= m; i++) + c[i * (n + 1)] = 0; + for (j = 0; j <= n; j++) + c[j] = 0; + for (i = 1; i <= m; i++) { + for (j = 1; j <= n; j++) { + if (((utf8) && (su[i - 1] == su2[j - 1])) || + ((!utf8) && (s[i - 1] == s2[j - 1]))) { + c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1; + b[i * (n + 1) + j] = LCS_UPLEFT; + } else if (c[(i - 1) * (n + 1) + j] >= c[i * (n + 1) + j - 1]) { + c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j]; + b[i * (n + 1) + j] = LCS_UP; + } else { + c[i * (n + 1) + j] = c[i * (n + 1) + j - 1]; + b[i * (n + 1) + j] = LCS_LEFT; + } + } + } + *result = b; + free(c); + *l1 = m; + *l2 = n; +} + +int SuggestMgr::lcslen(const char* s, const char* s2) { + int m; + int n; + int i; + int j; + char* result; + int len = 0; + lcs(s, s2, &m, &n, &result); + if (!result) + return 0; + i = m; + j = n; + while ((i != 0) && (j != 0)) { + if (result[i * (n + 1) + j] == LCS_UPLEFT) { + len++; + i--; + j--; + } else if (result[i * (n + 1) + j] == LCS_UP) { + i--; + } else + j--; + } + free(result); + return len; +} + +int SuggestMgr::lcslen(const std::string& s, const std::string& s2) { + return lcslen(s.c_str(), s2.c_str()); +} diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.hxx b/extensions/spellcheck/hunspell/src/suggestmgr.hxx new file mode 100644 index 0000000000..4c2fb69032 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx @@ -0,0 +1,183 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ +/* + * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada + * And Contributors. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. All modifications to the source code must be clearly marked as + * such. Binary redistributions based on modified source code + * must be clearly marked as modified versions in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SUGGESTMGR_HXX_ +#define SUGGESTMGR_HXX_ + +#define MAX_ROOTS 100 +#define MAX_WORDS 100 +#define MAX_GUESS 200 +#define MAXNGRAMSUGS 4 +#define MAXPHONSUGS 2 +#define MAXCOMPOUNDSUGS 3 + +#define NGRAM_LONGER_WORSE (1 << 0) +#define NGRAM_ANY_MISMATCH (1 << 1) +#define NGRAM_LOWERING (1 << 2) +#define NGRAM_WEIGHTED (1 << 3) + +#include "atypes.hxx" +#include "affixmgr.hxx" +#include "hashmgr.hxx" +#include "langnum.hxx" + +enum { LCS_UP, LCS_LEFT, LCS_UPLEFT }; + +class SuggestMgr { + private: + SuggestMgr(const SuggestMgr&); + SuggestMgr& operator=(const SuggestMgr&); + + private: + char* ckey; + size_t ckeyl; + std::vector<w_char> ckey_utf; + + char* ctry; + size_t ctryl; + std::vector<w_char> ctry_utf; + bool lang_with_dash_usage; + + AffixMgr* pAMgr; + unsigned int maxSug; + struct cs_info* csconv; + int utf8; + int langnum; + int nosplitsugs; + int maxngramsugs; + int maxcpdsugs; + int complexprefixes; + + public: + SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr); + ~SuggestMgr(); + + bool suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug); + void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr, int captype); + + std::string suggest_morph(const std::string& word); + std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern); + + private: + void testsug(std::vector<std::string>& wlst, + const std::string& candidate, + int cpdsuggest, + int* timer, + clock_t* timelimit); + int checkword(const std::string& word, int, int*, clock_t*); + int check_forbidden(const char*, int); + + void capchars(std::vector<std::string>&, const char*, int); + int replchars(std::vector<std::string>&, const char*, int); + int doubletwochars(std::vector<std::string>&, const char*, int); + int forgotchar(std::vector<std::string>&, const char*, int); + int swapchar(std::vector<std::string>&, const char*, int); + int longswapchar(std::vector<std::string>&, const char*, int); + int movechar(std::vector<std::string>&, const char*, int); + int extrachar(std::vector<std::string>&, const char*, int); + int badcharkey(std::vector<std::string>&, const char*, int); + int badchar(std::vector<std::string>&, const char*, int); + bool twowords(std::vector<std::string>&, const char*, int, bool); + + void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int); + int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int); + int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int); + int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int); + int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int); + int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int); + int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int); + int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int); + int movechar_utf(std::vector<std::string>&, const w_char*, int, int); + + int mapchars(std::vector<std::string>&, const char*, int); + int map_related(const char*, + std::string&, + int, + std::vector<std::string>& wlst, + int, + const std::vector<mapentry>&, + int*, + clock_t*); + int ngram(int n, const std::vector<w_char>& su1, + const std::vector<w_char>& su2, int opt); + int ngram(int n, const std::string& s1, const std::string& s2, int opt); + int mystrlen(const char* word); + int leftcommonsubstring(const std::vector<w_char>& su1, + const std::vector<w_char>& su2); + int leftcommonsubstring(const char* s1, const char* s2); + int commoncharacterpositions(const char* s1, const char* s2, int* is_swap); + void bubblesort(char** rwd, char** rwd2, int* rsc, int n); + void lcs(const char* s, const char* s2, int* l1, int* l2, char** result); + int lcslen(const char* s, const char* s2); + int lcslen(const std::string& s, const std::string& s2); + std::string suggest_hentry_gen(hentry* rv, const char* pattern); +}; + +#endif diff --git a/extensions/spellcheck/hunspell/src/w_char.hxx b/extensions/spellcheck/hunspell/src/w_char.hxx new file mode 100644 index 0000000000..7e71d04680 --- /dev/null +++ b/extensions/spellcheck/hunspell/src/w_char.hxx @@ -0,0 +1,72 @@ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * Copyright (C) 2002-2022 Németh László + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. + * + * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, + * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, + * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, + * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, + * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#ifndef W_CHAR_HXX_ +#define W_CHAR_HXX_ + +#include <string> + +#ifndef GCC +struct w_char { +#else +struct __attribute__((packed)) w_char { +#endif + unsigned char l; + unsigned char h; + + friend bool operator<(const w_char a, const w_char b) { + unsigned short a_idx = (a.h << 8) + a.l; + unsigned short b_idx = (b.h << 8) + b.l; + return a_idx < b_idx; + } + + friend bool operator==(const w_char a, const w_char b) { + return (((a).l == (b).l) && ((a).h == (b).h)); + } + + friend bool operator!=(const w_char a, const w_char b) { + return !(a == b);; + } +}; + +// two character arrays +struct replentry { + std::string pattern; + std::string outstrings[4]; // med, ini, fin, isol +}; + +#endif diff --git a/extensions/spellcheck/hunspell/tests/crashtests/1825445.html b/extensions/spellcheck/hunspell/tests/crashtests/1825445.html new file mode 100644 index 0000000000..203bd2d1c3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/crashtests/1825445.html @@ -0,0 +1,12 @@ +<html> +<p id="targetParagraph"> + +<script> + document.designMode = 'on'; + + function crash() { + for(var i=0; i<850; i++) { targetParagraph.insertAdjacentText("afterEnd", "S".repeat(8567)); } + } + crash(); +</script> +</html> diff --git a/extensions/spellcheck/hunspell/tests/crashtests/crashtests.list b/extensions/spellcheck/hunspell/tests/crashtests/crashtests.list new file mode 100644 index 0000000000..7da5d38f8e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/crashtests/crashtests.list @@ -0,0 +1 @@ +load 1825445.html
\ No newline at end of file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff new file mode 100644 index 0000000000..0a11404fd6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.aff @@ -0,0 +1,4 @@ +# capitalized ngram suggestion test data (Unicode version) for +# Sf.net Bug ID 1463589, reported by Frederik Fouvry. +SET UTF-8 +MAXNGRAMSUGS 1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic new file mode 100644 index 0000000000..8cec606034 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.dic @@ -0,0 +1,2 @@ +1 +Kühlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug new file mode 100644 index 0000000000..8a72f1e21f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.sug @@ -0,0 +1,5 @@ +Kühlschrank +Kühlschrank +Kühlschrank +Kühlschrank +Kühlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong new file mode 100644 index 0000000000..9de6c63cdf --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589-utf.wrong @@ -0,0 +1,5 @@ +kuhlschrank +kuehlschrank +kühlschrank +Kuhlschrank +Kuehlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff b/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff new file mode 100644 index 0000000000..8ecf4594e0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.aff @@ -0,0 +1,3 @@ +# capitalized ngram suggestion test data for +# Sf.net Bug ID 1463589, reported by Frederik Fouvry. +MAXNGRAMSUGS 1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic b/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic new file mode 100644 index 0000000000..a3caab802f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.dic @@ -0,0 +1,2 @@ +1 +Khlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug b/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug new file mode 100644 index 0000000000..2961eddd2b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.sug @@ -0,0 +1,5 @@ +Khlschrank +Khlschrank +Khlschrank +Khlschrank +Khlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.test b/extensions/spellcheck/hunspell/tests/unit/data/1463589.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong new file mode 100644 index 0000000000..0f3f489698 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1463589.wrong @@ -0,0 +1,5 @@ +kuhlschrank +kuehlschrank +khlschrank +Kuhlschrank +Kuehlschrank diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff b/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff new file mode 100644 index 0000000000..0aa064e37e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.aff @@ -0,0 +1,20 @@ +# fix homonym handling for German dictionary project, +# reported by Björn Jacke (sf.net Bug ID 1592880). +SET ISO8859-1 + +SFX N Y 1 +SFX N 0 n . + +SFX S Y 1 +SFX S 0 s . + +SFX P Y 1 +SFX P 0 en . + +SFX Q Y 2 +SFX Q 0 e . +SFX Q 0 en . + +COMPOUNDEND z +COMPOUNDPERMITFLAG c +ONLYINCOMPOUND o diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic b/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic new file mode 100644 index 0000000000..8b0fef8141 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.dic @@ -0,0 +1,4 @@ +3 +weg/Qoz +weg/P +wege diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.good b/extensions/spellcheck/hunspell/tests/unit/data/1592880.good new file mode 100644 index 0000000000..aa00a58b12 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.good @@ -0,0 +1,3 @@ +weg +wege +wegen diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1592880.test b/extensions/spellcheck/hunspell/tests/unit/data/1592880.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1592880.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff b/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff new file mode 100644 index 0000000000..359a25f3a3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.aff @@ -0,0 +1,10 @@ +# fix NEEDAFFIX homonym suggestion. +# Sf.net Bug ID 1695964, reported by Björn Jacke. +TRY esianrtolcdugmphbyfvkwESIANRTOLCDUGMPHBYFVKW +MAXNGRAMSUGS 0 +NEEDAFFIX h +SFX S Y 1 +SFX S 0 s . + +SFX e Y 1 +SFX e 0 e . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic b/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic new file mode 100644 index 0000000000..ff6d110cc7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.dic @@ -0,0 +1,3 @@ +2 +Mull/he +Mull/S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug b/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug new file mode 100644 index 0000000000..35aedff7cc --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.sug @@ -0,0 +1,3 @@ +Mull +Mulle +Mulls diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.test b/extensions/spellcheck/hunspell/tests/unit/data/1695964.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong new file mode 100644 index 0000000000..fd13dc8cac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1695964.wrong @@ -0,0 +1,3 @@ +Mall +Malle +Malls diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff b/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff new file mode 100644 index 0000000000..66a676efa5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.aff @@ -0,0 +1,13 @@ +# test COMPOUNDRULE bug reported by Björn Jacke +SET ISO8859-1 +TRY esijanrtolcdugmphbyfvkwqxz + +SFX A Y 5 +SFX A 0 e . +SFX A 0 er . +SFX A 0 en . +SFX A 0 em . +SFX A 0 es . + +COMPOUNDRULE 1 +COMPOUNDRULE vw diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic b/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic new file mode 100644 index 0000000000..32d461f7a9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.dic @@ -0,0 +1,4 @@ +3 +arbeits/v +scheu/Aw +farbig/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.test b/extensions/spellcheck/hunspell/tests/unit/data/1706659.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong new file mode 100644 index 0000000000..799dd31117 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1706659.wrong @@ -0,0 +1,3 @@ +arbeitsfarbig +arbeitsfarbige +arbeitsfarbiger diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff b/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff new file mode 100644 index 0000000000..0912050d1f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.aff @@ -0,0 +1,6 @@ +SET UTF-8 +IGNORE ٌٍَُِّْـ + +PFX x N 1 +PFX x أ ت أ[^ي] + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic b/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic new file mode 100644 index 0000000000..b1b455df5a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.dic @@ -0,0 +1,3 @@ +2 +أرى/x +أيار/x diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.good b/extensions/spellcheck/hunspell/tests/unit/data/1975530.good new file mode 100644 index 0000000000..89212a57ec --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.good @@ -0,0 +1,3 @@ +أرى +أيار +ترى diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.test b/extensions/spellcheck/hunspell/tests/unit/data/1975530.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong b/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong new file mode 100644 index 0000000000..24cb57627a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/1975530.wrong @@ -0,0 +1 @@ +تيار diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff b/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff new file mode 100644 index 0000000000..6ef95161d7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.aff @@ -0,0 +1,5 @@ +# test words with three parts +CHECKCOMPOUNDPATTERN 1 +CHECKCOMPOUNDPATTERN le fi +COMPOUNDFLAG c + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic b/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic new file mode 100644 index 0000000000..f0b6305693 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.dic @@ -0,0 +1,4 @@ +3 +first/c +middle/c +last/c diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.good b/extensions/spellcheck/hunspell/tests/unit/data/2970240.good new file mode 100644 index 0000000000..a8d3a593b6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.good @@ -0,0 +1 @@ +firstmiddlelast diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.test b/extensions/spellcheck/hunspell/tests/unit/data/2970240.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong b/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong new file mode 100644 index 0000000000..32cead611d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970240.wrong @@ -0,0 +1 @@ +lastmiddlefirst diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff b/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff new file mode 100644 index 0000000000..909f0fbc3d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.aff @@ -0,0 +1,4 @@ +CHECKCOMPOUNDPATTERN 1 +CHECKCOMPOUNDPATTERN /a /b +COMPOUNDFLAG c + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic b/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic new file mode 100644 index 0000000000..da0d05f92a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.dic @@ -0,0 +1,4 @@ +3 +foo/ac +bar/c +baz/bc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.good b/extensions/spellcheck/hunspell/tests/unit/data/2970242.good new file mode 100644 index 0000000000..90ecb182fa --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.good @@ -0,0 +1,5 @@ +foobar +barfoo +bazfoo +barbaz +bazbar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.test b/extensions/spellcheck/hunspell/tests/unit/data/2970242.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong b/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong new file mode 100644 index 0000000000..9dabfec919 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2970242.wrong @@ -0,0 +1 @@ +foobaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff b/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff new file mode 100644 index 0000000000..ea9d0b07ba --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.aff @@ -0,0 +1,6 @@ +COMPOUNDRULE 1 +COMPOUNDRULE ab + +COMPOUNDBEGIN A +COMPOUNDEND B + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic b/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic new file mode 100644 index 0000000000..249860362e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.dic @@ -0,0 +1,4 @@ +3 +foo/aA +bar/b +baz/B diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.good b/extensions/spellcheck/hunspell/tests/unit/data/2999225.good new file mode 100644 index 0000000000..865e15452d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.good @@ -0,0 +1,2 @@ +foobar +foobaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/2999225.test b/extensions/spellcheck/hunspell/tests/unit/data/2999225.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/2999225.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff b/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff new file mode 100644 index 0000000000..c817c2e913 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.aff @@ -0,0 +1,8 @@ +# check bad capitalisation of Dutch letter IJ. +TRY i +FORBIDDENWORD * +PFX i N 1 +PFX i ij IJ ij + +REP 1 +REP ij IJ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic b/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic new file mode 100644 index 0000000000..ecaf91d212 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.dic @@ -0,0 +1,3 @@ +1 +ijs/i +Ijs/* diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.good b/extensions/spellcheck/hunspell/tests/unit/data/IJ.good new file mode 100644 index 0000000000..5f888f057d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.good @@ -0,0 +1,2 @@ +ijs +IJs diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug b/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug new file mode 100644 index 0000000000..582b7956b5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.sug @@ -0,0 +1 @@ +IJs, ijs diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.test b/extensions/spellcheck/hunspell/tests/unit/data/IJ.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong b/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong new file mode 100644 index 0000000000..54bbb475a0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/IJ.wrong @@ -0,0 +1 @@ +Ijs diff --git a/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am new file mode 100644 index 0000000000..8018ccf7ba --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.am @@ -0,0 +1,693 @@ +## Process this file with automake to create Makefile.in + +SUBDIRS = suggestiontest + +XFAIL_TESTS = @XFAILED@ + +TESTS = \ +affixes.test \ +condition.test \ +condition_utf.test \ +base.test \ +base_utf.test \ +allcaps.test \ +allcaps_utf.test \ +allcaps2.test \ +allcaps3.test \ +keepcase.test \ +i58202.test \ +map.test \ +rep.test \ +sug.test \ +sugutf.test \ +phone.test \ +flag.test \ +flaglong.test \ +flagnum.test \ +flagutf8.test \ +slash.test \ +forbiddenword.test \ +nosuggest.test \ +alias.test \ +alias2.test \ +alias3.test \ +breakdefault.test \ +break.test \ +needaffix.test \ +needaffix2.test \ +needaffix3.test \ +needaffix4.test \ +needaffix5.test \ +circumfix.test \ +fogemorpheme.test \ +onlyincompound.test \ +complexprefixes.test \ +complexprefixes2.test \ +complexprefixesutf.test \ +conditionalprefix.test \ +zeroaffix.test \ +utf8.test \ +utf8_bom.test \ +utf8_bom2.test \ +utf8_nonbmp.test \ +compoundflag.test \ +compoundrule.test \ +compoundrule2.test \ +compoundrule3.test \ +compoundrule4.test \ +compoundrule5.test \ +compoundrule6.test \ +compoundrule7.test \ +compoundrule8.test \ +compoundaffix.test \ +compoundaffix2.test \ +compoundaffix3.test \ +checkcompounddup.test \ +checkcompoundtriple.test \ +simplifiedtriple.test \ +checkcompoundrep.test \ +checkcompoundcase2.test \ +checkcompoundcaseutf.test \ +checkcompoundpattern.test \ +checkcompoundpattern2.test \ +checkcompoundpattern3.test \ +checkcompoundpattern4.test \ +utfcompound.test \ +checksharps.test \ +checksharpsutf.test \ +germancompounding.test \ +germancompoundingold.test \ +i35725.test \ +i53643.test \ +i54633.test \ +i54980.test \ +maputf.test \ +reputf.test \ +ignore.test \ +ignoreutf.test \ +1592880.test \ +1695964.test \ +1463589.test \ +1463589_utf.test \ +IJ.test \ +i68568.test \ +i68568utf.test \ +1706659.test \ +digits_in_words.test \ +colons_in_words.test \ +ngram_utf_fix.test \ +morph.test \ +1975530.test \ +fullstrip.test \ +iconv.test \ +oconv.test \ +encoding.test \ +korean.test \ +opentaal_forbiddenword1.test \ +opentaal_forbiddenword2.test \ +opentaal_keepcase.test \ +arabic.test \ +2970240.test \ +2970242.test \ +breakoff.test \ +opentaal_cpdpat.test \ +opentaal_cpdpat2.test \ +2999225.test \ +onlyincompound2.test \ +forceucase.test \ +warn.test + +# infixes.test + +distclean-local: + -rm -rf testSubDir + +EXTRA_DIST = \ +test.sh \ +affixes.aff \ +affixes.dic \ +affixes.good \ +affixes.test \ +condition.aff \ +condition.dic \ +condition.good \ +condition.test \ +condition.wrong \ +condition_utf.aff \ +condition_utf.dic \ +condition_utf.good \ +condition_utf.test \ +condition_utf.wrong \ +base.aff \ +base.dic \ +base.good \ +base.sug \ +base.test \ +base.wrong \ +base_utf.aff \ +base_utf.dic \ +base_utf.good \ +base_utf.sug \ +base_utf.test \ +base_utf.wrong \ +allcaps.aff \ +allcaps.dic \ +allcaps.good \ +allcaps.sug \ +allcaps.test \ +allcaps.wrong \ +allcaps2.aff \ +allcaps2.dic \ +allcaps2.good \ +allcaps2.sug \ +allcaps2.test \ +allcaps2.wrong \ +allcaps3.aff \ +allcaps3.dic \ +allcaps3.good \ +allcaps3.test \ +allcaps3.wrong \ +allcaps_utf.aff \ +allcaps_utf.dic \ +allcaps_utf.good \ +allcaps_utf.sug \ +allcaps_utf.test \ +allcaps_utf.wrong \ +keepcase.aff \ +keepcase.dic \ +keepcase.good \ +keepcase.sug \ +keepcase.test \ +keepcase.wrong \ +map.aff \ +map.dic \ +map.sug \ +map.test \ +map.wrong \ +rep.aff \ +rep.dic \ +rep.sug \ +rep.test \ +rep.wrong \ +sug.aff \ +sug.dic \ +sug.sug \ +sug.test \ +sug.wrong \ +sugutf.aff \ +sugutf.dic \ +sugutf.sug \ +sugutf.test \ +sugutf.wrong \ +phone.aff \ +phone.dic \ +phone.sug \ +phone.test \ +phone.wrong \ +alias.aff \ +alias.dic \ +alias.good \ +alias.test \ +alias2.aff \ +alias2.dic \ +alias2.good \ +alias2.morph \ +alias2.test \ +alias3.aff \ +alias3.dic \ +alias3.good \ +alias3.morph \ +alias3.test \ +break.aff \ +break.dic \ +break.good \ +break.test \ +break.wrong \ +breakdefault.aff \ +breakdefault.dic \ +breakdefault.good \ +breakdefault.sug \ +breakdefault.test \ +breakdefault.wrong \ +circumfix.aff \ +circumfix.dic \ +circumfix.good \ +circumfix.morph \ +circumfix.test \ +circumfix.wrong \ +fogemorpheme.aff \ +fogemorpheme.dic \ +fogemorpheme.good \ +fogemorpheme.test \ +fogemorpheme.wrong \ +onlyincompound.aff \ +onlyincompound.dic \ +onlyincompound.good \ +onlyincompound.sug \ +onlyincompound.test \ +onlyincompound.wrong \ +forbiddenword.aff \ +forbiddenword.dic \ +forbiddenword.good \ +forbiddenword.test \ +forbiddenword.wrong \ +nosuggest.aff \ +nosuggest.dic \ +nosuggest.good \ +nosuggest.sug \ +nosuggest.test \ +nosuggest.wrong \ +germancompounding.aff \ +germancompounding.dic \ +germancompounding.good \ +germancompounding.test \ +germancompounding.wrong \ +germancompoundingold.aff \ +germancompoundingold.dic \ +germancompoundingold.good \ +germancompoundingold.test \ +germancompoundingold.wrong \ +needaffix2.aff \ +needaffix2.dic \ +needaffix2.good \ +needaffix2.morph \ +needaffix2.test \ +needaffix3.aff \ +needaffix3.dic \ +needaffix3.good \ +needaffix3.test \ +needaffix3.wrong \ +needaffix4.aff \ +needaffix4.dic \ +needaffix4.good \ +needaffix4.test \ +needaffix5.aff \ +needaffix5.dic \ +needaffix5.good \ +needaffix5.test \ +needaffix5.wrong \ +needaffix.aff \ +needaffix.dic \ +needaffix.good \ +needaffix.test \ +needaffix.wrong \ +zeroaffix.aff \ +zeroaffix.dic \ +zeroaffix.good \ +zeroaffix.morph \ +zeroaffix.test \ +utf8.aff \ +utf8.dic \ +utf8.good \ +utf8.test \ +utf8_bom.aff \ +utf8_bom.dic \ +utf8_bom.good \ +utf8_bom.test \ +utf8_bom2.aff \ +utf8_bom2.dic \ +utf8_bom2.good \ +utf8_bom2.test \ +utf8_nonbmp.aff \ +utf8_nonbmp.dic \ +utf8_nonbmp.good \ +utf8_nonbmp.sug \ +utf8_nonbmp.test \ +utf8_nonbmp.wrong \ +utfcompound.aff \ +utfcompound.dic \ +utfcompound.good \ +utfcompound.test \ +utfcompound.wrong \ +compoundflag.aff \ +compoundflag.dic \ +compoundflag.good \ +compoundflag.test \ +compoundflag.wrong \ +compoundrule.aff \ +compoundrule.dic \ +compoundrule.good \ +compoundrule.test \ +compoundrule.wrong \ +compoundrule2.aff \ +compoundrule2.dic \ +compoundrule2.good \ +compoundrule2.test \ +compoundrule2.wrong \ +compoundrule3.aff \ +compoundrule3.dic \ +compoundrule3.good \ +compoundrule3.test \ +compoundrule3.wrong \ +compoundrule4.aff \ +compoundrule4.dic \ +compoundrule4.good \ +compoundrule4.test \ +compoundrule4.wrong \ +compoundrule5.aff \ +compoundrule5.dic \ +compoundrule5.good \ +compoundrule5.morph \ +compoundrule5.test \ +compoundrule5.wrong \ +compoundrule6.aff \ +compoundrule6.dic \ +compoundrule6.good \ +compoundrule6.test \ +compoundrule6.wrong \ +compoundrule7.aff \ +compoundrule7.dic \ +compoundrule7.good \ +compoundrule7.test \ +compoundrule7.wrong \ +compoundrule8.aff \ +compoundrule8.dic \ +compoundrule8.good \ +compoundrule8.test \ +compoundrule8.wrong \ +compoundaffix.aff \ +compoundaffix.dic \ +compoundaffix.good \ +compoundaffix.test \ +compoundaffix.wrong \ +compoundaffix2.aff \ +compoundaffix2.dic \ +compoundaffix2.good \ +compoundaffix2.test \ +compoundaffix3.aff \ +compoundaffix3.dic \ +compoundaffix3.good \ +compoundaffix3.test \ +compoundaffix3.wrong \ +checkcompounddup.aff \ +checkcompounddup.dic \ +checkcompounddup.good \ +checkcompounddup.test \ +checkcompounddup.wrong \ +checkcompoundcase.aff \ +checkcompoundcase.dic \ +checkcompoundcase.good \ +checkcompoundcase.test \ +checkcompoundcase.wrong \ +checkcompoundcase2.aff \ +checkcompoundcase2.dic \ +checkcompoundcase2.good \ +checkcompoundcase2.test \ +checkcompoundcase2.wrong \ +checkcompoundcaseutf.aff \ +checkcompoundcaseutf.dic \ +checkcompoundcaseutf.good \ +checkcompoundcaseutf.test \ +checkcompoundcaseutf.wrong \ +checkcompoundrep.aff \ +checkcompoundrep.dic \ +checkcompoundrep.good \ +checkcompoundrep.test \ +checkcompoundrep.wrong \ +checkcompoundtriple.aff \ +checkcompoundtriple.dic \ +checkcompoundtriple.good \ +checkcompoundtriple.test \ +checkcompoundtriple.wrong \ +simplifiedtriple.aff \ +simplifiedtriple.dic \ +simplifiedtriple.good \ +simplifiedtriple.test \ +simplifiedtriple.wrong \ +checkcompoundpattern.aff \ +checkcompoundpattern.dic \ +checkcompoundpattern.good \ +checkcompoundpattern.test \ +checkcompoundpattern.wrong \ +checkcompoundpattern2.aff \ +checkcompoundpattern2.dic \ +checkcompoundpattern2.good \ +checkcompoundpattern2.test \ +checkcompoundpattern2.wrong \ +checkcompoundpattern3.aff \ +checkcompoundpattern3.dic \ +checkcompoundpattern3.good \ +checkcompoundpattern3.test \ +checkcompoundpattern3.wrong \ +checkcompoundpattern4.aff \ +checkcompoundpattern4.dic \ +checkcompoundpattern4.good \ +checkcompoundpattern4.test \ +checkcompoundpattern4.wrong \ +checksharps.aff \ +checksharps.dic \ +checksharps.good \ +checksharps.sug \ +checksharps.test \ +checksharps.wrong \ +checksharpsutf.aff \ +checksharpsutf.dic \ +checksharpsutf.good \ +checksharpsutf.sug \ +checksharpsutf.test \ +checksharpsutf.wrong \ +conditionalprefix.aff \ +conditionalprefix.dic \ +conditionalprefix.good \ +conditionalprefix.morph \ +conditionalprefix.test \ +conditionalprefix.wrong \ +flaglong.aff \ +flaglong.dic \ +flaglong.good \ +flaglong.test \ +flagnum.aff \ +flagnum.dic \ +flagnum.good \ +flagnum.test \ +flag.aff \ +flag.dic \ +flag.good \ +flag.test \ +flagutf8.aff \ +flagutf8.dic \ +flagutf8.good \ +flagutf8.test \ +complexprefixes.aff \ +complexprefixes.dic \ +complexprefixes.good \ +complexprefixes.wrong \ +complexprefixes.test \ +complexprefixes2.aff \ +complexprefixes2.dic \ +complexprefixes2.good \ +complexprefixes2.test \ +complexprefixesutf.aff \ +complexprefixesutf.dic \ +complexprefixesutf.good \ +complexprefixesutf.wrong \ +complexprefixesutf.test \ +i35725.aff \ +i35725.dic \ +i35725.good \ +i35725.sug \ +i35725.test \ +i35725.wrong \ +i53643.aff \ +i53643.dic \ +i53643.good \ +i53643.test \ +i53643.wrong \ +i54633.aff \ +i54633.dic \ +i54633.good \ +i54633.sug \ +i54633.test \ +i54633.wrong \ +i54980.aff \ +i54980.dic \ +i54980.good \ +i54980.test \ +i58202.aff \ +i58202.dic \ +i58202.good \ +i58202.sug \ +i58202.test \ +i58202.wrong \ +maputf.aff \ +maputf.dic \ +maputf.sug \ +maputf.wrong \ +maputf.test \ +reputf.aff \ +reputf.dic \ +reputf.sug \ +reputf.wrong \ +reputf.test \ +slash.aff \ +slash.dic \ +slash.good \ +slash.test \ +ignore.aff \ +ignore.dic \ +ignore.good \ +ignore.test \ +ignoreutf.aff \ +ignoreutf.dic \ +ignoreutf.good \ +ignoreutf.test \ +1592880.aff \ +1592880.dic \ +1592880.good \ +1592880.test \ +1695964.aff \ +1695964.dic \ +1695964.sug \ +1695964.test \ +1695964.wrong \ +1463589.aff \ +1463589.dic \ +1463589.sug \ +1463589.test \ +1463589.wrong \ +1463589_utf.aff \ +1463589_utf.dic \ +1463589_utf.sug \ +1463589_utf.test \ +1463589_utf.wrong \ +IJ.aff \ +IJ.dic \ +IJ.good \ +IJ.sug \ +IJ.test \ +IJ.wrong \ +i68568.aff \ +i68568.dic \ +i68568.test \ +i68568.wrong \ +i68568utf.aff \ +i68568utf.dic \ +i68568utf.test \ +i68568utf.wrong \ +1706659.aff \ +1706659.dic \ +1706659.test \ +1706659.wrong \ +digits_in_words.aff \ +digits_in_words.dic \ +digits_in_words.test \ +digits_in_words.wrong \ +colons_in_words.aff \ +colons_in_words.dic \ +colons_in_words.test \ +ngram_utf_fix.aff \ +ngram_utf_fix.dic \ +ngram_utf_fix.good \ +ngram_utf_fix.sug \ +ngram_utf_fix.test \ +ngram_utf_fix.wrong \ +morph.aff \ +morph.dic \ +morph.good \ +morph.morph \ +morph.test \ +1975530.aff \ +1975530.dic \ +1975530.good \ +1975530.test \ +1975530.wrong \ +fullstrip.aff \ +fullstrip.dic \ +fullstrip.good \ +fullstrip.test \ +iconv.aff \ +iconv.dic \ +iconv.good \ +iconv.test \ +oconv.aff \ +oconv.dic \ +oconv.good \ +oconv.sug \ +oconv.test \ +oconv.wrong \ +encoding.aff \ +encoding.dic \ +encoding.good \ +encoding.test \ +opentaal_forbiddenword1.aff \ +opentaal_forbiddenword1.dic \ +opentaal_forbiddenword1.good \ +opentaal_forbiddenword1.sug \ +opentaal_forbiddenword1.test \ +opentaal_forbiddenword1.wrong \ +opentaal_forbiddenword2.aff \ +opentaal_forbiddenword2.dic \ +opentaal_forbiddenword2.good \ +opentaal_forbiddenword2.sug \ +opentaal_forbiddenword2.test \ +opentaal_forbiddenword2.wrong \ +opentaal_forbiddenword2.aff \ +opentaal_forbiddenword2.dic \ +opentaal_forbiddenword2.good \ +opentaal_forbiddenword2.sug \ +opentaal_forbiddenword2.test \ +opentaal_forbiddenword2.wrong \ +opentaal_keepcase.aff \ +opentaal_keepcase.dic \ +opentaal_keepcase.good \ +opentaal_keepcase.sug \ +opentaal_keepcase.test \ +opentaal_keepcase.wrong \ +arabic.aff \ +arabic.dic \ +arabic.wrong \ +arabic.test \ +2970240.aff \ +2970240.dic \ +2970240.good \ +2970240.wrong \ +2970240.test \ +2970242.aff \ +2970242.dic \ +2970242.good \ +2970242.wrong \ +2970242.test \ +breakoff.aff \ +breakoff.dic \ +breakoff.good \ +breakoff.wrong \ +breakoff.test \ +opentaal_cpdpat.aff \ +opentaal_cpdpat.dic \ +opentaal_cpdpat.good \ +opentaal_cpdpat.wrong \ +opentaal_cpdpat.test \ +opentaal_cpdpat2.aff \ +opentaal_cpdpat2.dic \ +opentaal_cpdpat2.good \ +opentaal_cpdpat2.wrong \ +opentaal_cpdpat2.test \ +2999225.aff \ +2999225.dic \ +2999225.good \ +2999225.test \ +korean.aff \ +korean.dic \ +korean.good \ +korean.wrong \ +korean.test \ +onlyincompound2.aff \ +onlyincompound2.dic \ +onlyincompound2.good \ +onlyincompound2.test \ +onlyincompound2.wrong \ +forceucase.aff \ +forceucase.dic \ +forceucase.good \ +forceucase.sug \ +forceucase.wrong \ +forceucase.test \ +warn.aff \ +warn.dic \ +warn.good \ +warn.test + +# infixes.aff +# infixes.dic +# infixes.good +# infixes.test diff --git a/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in new file mode 100644 index 0000000000..a27e048758 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/Makefile.in @@ -0,0 +1,1416 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = tests +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \ + $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \ + $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \ + $(top_srcdir)/m4/inttypes-pri.m4 \ + $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \ + $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \ + $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \ + $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ + $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ + distdir +ETAGS = etags +CTAGS = ctags +am__tty_colors = \ +red=; grn=; lgn=; blu=; std= +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +am__relativize = \ + dir0=`pwd`; \ + sed_first='s,^\([^/]*\)/.*$$,\1,'; \ + sed_rest='s,^[^/]*/*,,'; \ + sed_last='s,^.*/\([^/]*\)$$,\1,'; \ + sed_butlast='s,/*[^/]*$$,,'; \ + while test -n "$$dir1"; do \ + first=`echo "$$dir1" | sed -e "$$sed_first"`; \ + if test "$$first" != "."; then \ + if test "$$first" = ".."; then \ + dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ + dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ + else \ + first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ + if test "$$first2" = "$$first"; then \ + dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ + else \ + dir2="../$$dir2"; \ + fi; \ + dir0="$$dir0"/"$$first"; \ + fi; \ + fi; \ + dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ + done; \ + reldir="$$dir2" +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@ +CATOBJEXT = @CATOBJEXT@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CURSESLIB = @CURSESLIB@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATADIRNAME = @DATADIRNAME@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENCAT = @GENCAT@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIBC2 = @GLIBC2@ +GLIBC21 = @GLIBC21@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_ASPRINTF = @HAVE_ASPRINTF@ +HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ +HAVE_SNPRINTF = @HAVE_SNPRINTF@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +HAVE_WPRINTF = @HAVE_WPRINTF@ +HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@ +HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INSTOBJEXT = @INSTOBJEXT@ +INTLBISON = @INTLBISON@ +INTLLIBS = @INTLLIBS@ +INTLOBJS = @INTLOBJS@ +INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMULTITHREAD = @LIBMULTITHREAD@ +LIBOBJS = @LIBOBJS@ +LIBPTH = @LIBPTH@ +LIBPTH_PREFIX = @LIBPTH_PREFIX@ +LIBS = @LIBS@ +LIBTHREAD = @LIBTHREAD@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBC = @LTLIBC@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBMULTITHREAD = @LTLIBMULTITHREAD@ +LTLIBOBJS = @LTLIBOBJS@ +LTLIBPTH = @LTLIBPTH@ +LTLIBTHREAD = @LTLIBTHREAD@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@ +RANLIB = @RANLIB@ +READLINELIB = @READLINELIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WINDRES = @WINDRES@ +WOE32 = @WOE32@ +WOE32DLL = @WOE32DLL@ +XFAILED = @XFAILED@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = suggestiontest +XFAIL_TESTS = @XFAILED@ +TESTS = \ +affixes.test \ +condition.test \ +condition_utf.test \ +base.test \ +base_utf.test \ +allcaps.test \ +allcaps_utf.test \ +allcaps2.test \ +allcaps3.test \ +keepcase.test \ +i58202.test \ +map.test \ +rep.test \ +sug.test \ +sugutf.test \ +phone.test \ +flag.test \ +flaglong.test \ +flagnum.test \ +flagutf8.test \ +slash.test \ +forbiddenword.test \ +nosuggest.test \ +alias.test \ +alias2.test \ +alias3.test \ +breakdefault.test \ +break.test \ +needaffix.test \ +needaffix2.test \ +needaffix3.test \ +needaffix4.test \ +needaffix5.test \ +circumfix.test \ +fogemorpheme.test \ +onlyincompound.test \ +complexprefixes.test \ +complexprefixes2.test \ +complexprefixesutf.test \ +conditionalprefix.test \ +zeroaffix.test \ +utf8.test \ +utf8_bom.test \ +utf8_bom2.test \ +utf8_nonbmp.test \ +compoundflag.test \ +compoundrule.test \ +compoundrule2.test \ +compoundrule3.test \ +compoundrule4.test \ +compoundrule5.test \ +compoundrule6.test \ +compoundrule7.test \ +compoundrule8.test \ +compoundaffix.test \ +compoundaffix2.test \ +compoundaffix3.test \ +checkcompounddup.test \ +checkcompoundtriple.test \ +simplifiedtriple.test \ +checkcompoundrep.test \ +checkcompoundcase2.test \ +checkcompoundcaseutf.test \ +checkcompoundpattern.test \ +checkcompoundpattern2.test \ +checkcompoundpattern3.test \ +checkcompoundpattern4.test \ +utfcompound.test \ +checksharps.test \ +checksharpsutf.test \ +germancompounding.test \ +germancompoundingold.test \ +i35725.test \ +i53643.test \ +i54633.test \ +i54980.test \ +maputf.test \ +reputf.test \ +ignore.test \ +ignoreutf.test \ +1592880.test \ +1695964.test \ +1463589.test \ +1463589_utf.test \ +IJ.test \ +i68568.test \ +i68568utf.test \ +1706659.test \ +digits_in_words.test \ +colons_in_words.test \ +ngram_utf_fix.test \ +morph.test \ +1975530.test \ +fullstrip.test \ +iconv.test \ +oconv.test \ +encoding.test \ +korean.test \ +opentaal_forbiddenword1.test \ +opentaal_forbiddenword2.test \ +opentaal_keepcase.test \ +arabic.test \ +2970240.test \ +2970242.test \ +breakoff.test \ +opentaal_cpdpat.test \ +opentaal_cpdpat2.test \ +2999225.test \ +onlyincompound2.test \ +forceucase.test \ +warn.test + +EXTRA_DIST = \ +test.sh \ +affixes.aff \ +affixes.dic \ +affixes.good \ +affixes.test \ +condition.aff \ +condition.dic \ +condition.good \ +condition.test \ +condition.wrong \ +condition_utf.aff \ +condition_utf.dic \ +condition_utf.good \ +condition_utf.test \ +condition_utf.wrong \ +base.aff \ +base.dic \ +base.good \ +base.sug \ +base.test \ +base.wrong \ +base_utf.aff \ +base_utf.dic \ +base_utf.good \ +base_utf.sug \ +base_utf.test \ +base_utf.wrong \ +allcaps.aff \ +allcaps.dic \ +allcaps.good \ +allcaps.sug \ +allcaps.test \ +allcaps.wrong \ +allcaps2.aff \ +allcaps2.dic \ +allcaps2.good \ +allcaps2.sug \ +allcaps2.test \ +allcaps2.wrong \ +allcaps3.aff \ +allcaps3.dic \ +allcaps3.good \ +allcaps3.test \ +allcaps3.wrong \ +allcaps_utf.aff \ +allcaps_utf.dic \ +allcaps_utf.good \ +allcaps_utf.sug \ +allcaps_utf.test \ +allcaps_utf.wrong \ +keepcase.aff \ +keepcase.dic \ +keepcase.good \ +keepcase.sug \ +keepcase.test \ +keepcase.wrong \ +map.aff \ +map.dic \ +map.sug \ +map.test \ +map.wrong \ +rep.aff \ +rep.dic \ +rep.sug \ +rep.test \ +rep.wrong \ +sug.aff \ +sug.dic \ +sug.sug \ +sug.test \ +sug.wrong \ +sugutf.aff \ +sugutf.dic \ +sugutf.sug \ +sugutf.test \ +sugutf.wrong \ +phone.aff \ +phone.dic \ +phone.sug \ +phone.test \ +phone.wrong \ +alias.aff \ +alias.dic \ +alias.good \ +alias.test \ +alias2.aff \ +alias2.dic \ +alias2.good \ +alias2.morph \ +alias2.test \ +alias3.aff \ +alias3.dic \ +alias3.good \ +alias3.morph \ +alias3.test \ +break.aff \ +break.dic \ +break.good \ +break.test \ +break.wrong \ +breakdefault.aff \ +breakdefault.dic \ +breakdefault.good \ +breakdefault.sug \ +breakdefault.test \ +breakdefault.wrong \ +circumfix.aff \ +circumfix.dic \ +circumfix.good \ +circumfix.morph \ +circumfix.test \ +circumfix.wrong \ +fogemorpheme.aff \ +fogemorpheme.dic \ +fogemorpheme.good \ +fogemorpheme.test \ +fogemorpheme.wrong \ +onlyincompound.aff \ +onlyincompound.dic \ +onlyincompound.good \ +onlyincompound.sug \ +onlyincompound.test \ +onlyincompound.wrong \ +forbiddenword.aff \ +forbiddenword.dic \ +forbiddenword.good \ +forbiddenword.test \ +forbiddenword.wrong \ +nosuggest.aff \ +nosuggest.dic \ +nosuggest.good \ +nosuggest.sug \ +nosuggest.test \ +nosuggest.wrong \ +germancompounding.aff \ +germancompounding.dic \ +germancompounding.good \ +germancompounding.test \ +germancompounding.wrong \ +germancompoundingold.aff \ +germancompoundingold.dic \ +germancompoundingold.good \ +germancompoundingold.test \ +germancompoundingold.wrong \ +needaffix2.aff \ +needaffix2.dic \ +needaffix2.good \ +needaffix2.morph \ +needaffix2.test \ +needaffix3.aff \ +needaffix3.dic \ +needaffix3.good \ +needaffix3.test \ +needaffix3.wrong \ +needaffix4.aff \ +needaffix4.dic \ +needaffix4.good \ +needaffix4.test \ +needaffix5.aff \ +needaffix5.dic \ +needaffix5.good \ +needaffix5.test \ +needaffix5.wrong \ +needaffix.aff \ +needaffix.dic \ +needaffix.good \ +needaffix.test \ +needaffix.wrong \ +zeroaffix.aff \ +zeroaffix.dic \ +zeroaffix.good \ +zeroaffix.morph \ +zeroaffix.test \ +utf8.aff \ +utf8.dic \ +utf8.good \ +utf8.test \ +utf8_bom.aff \ +utf8_bom.dic \ +utf8_bom.good \ +utf8_bom.test \ +utf8_bom2.aff \ +utf8_bom2.dic \ +utf8_bom2.good \ +utf8_bom2.test \ +utf8_nonbmp.aff \ +utf8_nonbmp.dic \ +utf8_nonbmp.good \ +utf8_nonbmp.sug \ +utf8_nonbmp.test \ +utf8_nonbmp.wrong \ +utfcompound.aff \ +utfcompound.dic \ +utfcompound.good \ +utfcompound.test \ +utfcompound.wrong \ +compoundflag.aff \ +compoundflag.dic \ +compoundflag.good \ +compoundflag.test \ +compoundflag.wrong \ +compoundrule.aff \ +compoundrule.dic \ +compoundrule.good \ +compoundrule.test \ +compoundrule.wrong \ +compoundrule2.aff \ +compoundrule2.dic \ +compoundrule2.good \ +compoundrule2.test \ +compoundrule2.wrong \ +compoundrule3.aff \ +compoundrule3.dic \ +compoundrule3.good \ +compoundrule3.test \ +compoundrule3.wrong \ +compoundrule4.aff \ +compoundrule4.dic \ +compoundrule4.good \ +compoundrule4.test \ +compoundrule4.wrong \ +compoundrule5.aff \ +compoundrule5.dic \ +compoundrule5.good \ +compoundrule5.morph \ +compoundrule5.test \ +compoundrule5.wrong \ +compoundrule6.aff \ +compoundrule6.dic \ +compoundrule6.good \ +compoundrule6.test \ +compoundrule6.wrong \ +compoundrule7.aff \ +compoundrule7.dic \ +compoundrule7.good \ +compoundrule7.test \ +compoundrule7.wrong \ +compoundrule8.aff \ +compoundrule8.dic \ +compoundrule8.good \ +compoundrule8.test \ +compoundrule8.wrong \ +compoundaffix.aff \ +compoundaffix.dic \ +compoundaffix.good \ +compoundaffix.test \ +compoundaffix.wrong \ +compoundaffix2.aff \ +compoundaffix2.dic \ +compoundaffix2.good \ +compoundaffix2.test \ +compoundaffix3.aff \ +compoundaffix3.dic \ +compoundaffix3.good \ +compoundaffix3.test \ +compoundaffix3.wrong \ +checkcompounddup.aff \ +checkcompounddup.dic \ +checkcompounddup.good \ +checkcompounddup.test \ +checkcompounddup.wrong \ +checkcompoundcase.aff \ +checkcompoundcase.dic \ +checkcompoundcase.good \ +checkcompoundcase.test \ +checkcompoundcase.wrong \ +checkcompoundcase2.aff \ +checkcompoundcase2.dic \ +checkcompoundcase2.good \ +checkcompoundcase2.test \ +checkcompoundcase2.wrong \ +checkcompoundcaseutf.aff \ +checkcompoundcaseutf.dic \ +checkcompoundcaseutf.good \ +checkcompoundcaseutf.test \ +checkcompoundcaseutf.wrong \ +checkcompoundrep.aff \ +checkcompoundrep.dic \ +checkcompoundrep.good \ +checkcompoundrep.test \ +checkcompoundrep.wrong \ +checkcompoundtriple.aff \ +checkcompoundtriple.dic \ +checkcompoundtriple.good \ +checkcompoundtriple.test \ +checkcompoundtriple.wrong \ +simplifiedtriple.aff \ +simplifiedtriple.dic \ +simplifiedtriple.good \ +simplifiedtriple.test \ +simplifiedtriple.wrong \ +checkcompoundpattern.aff \ +checkcompoundpattern.dic \ +checkcompoundpattern.good \ +checkcompoundpattern.test \ +checkcompoundpattern.wrong \ +checkcompoundpattern2.aff \ +checkcompoundpattern2.dic \ +checkcompoundpattern2.good \ +checkcompoundpattern2.test \ +checkcompoundpattern2.wrong \ +checkcompoundpattern3.aff \ +checkcompoundpattern3.dic \ +checkcompoundpattern3.good \ +checkcompoundpattern3.test \ +checkcompoundpattern3.wrong \ +checkcompoundpattern4.aff \ +checkcompoundpattern4.dic \ +checkcompoundpattern4.good \ +checkcompoundpattern4.test \ +checkcompoundpattern4.wrong \ +checksharps.aff \ +checksharps.dic \ +checksharps.good \ +checksharps.sug \ +checksharps.test \ +checksharps.wrong \ +checksharpsutf.aff \ +checksharpsutf.dic \ +checksharpsutf.good \ +checksharpsutf.sug \ +checksharpsutf.test \ +checksharpsutf.wrong \ +conditionalprefix.aff \ +conditionalprefix.dic \ +conditionalprefix.good \ +conditionalprefix.morph \ +conditionalprefix.test \ +conditionalprefix.wrong \ +flaglong.aff \ +flaglong.dic \ +flaglong.good \ +flaglong.test \ +flagnum.aff \ +flagnum.dic \ +flagnum.good \ +flagnum.test \ +flag.aff \ +flag.dic \ +flag.good \ +flag.test \ +flagutf8.aff \ +flagutf8.dic \ +flagutf8.good \ +flagutf8.test \ +complexprefixes.aff \ +complexprefixes.dic \ +complexprefixes.good \ +complexprefixes.wrong \ +complexprefixes.test \ +complexprefixes2.aff \ +complexprefixes2.dic \ +complexprefixes2.good \ +complexprefixes2.test \ +complexprefixesutf.aff \ +complexprefixesutf.dic \ +complexprefixesutf.good \ +complexprefixesutf.wrong \ +complexprefixesutf.test \ +i35725.aff \ +i35725.dic \ +i35725.good \ +i35725.sug \ +i35725.test \ +i35725.wrong \ +i53643.aff \ +i53643.dic \ +i53643.good \ +i53643.test \ +i53643.wrong \ +i54633.aff \ +i54633.dic \ +i54633.good \ +i54633.sug \ +i54633.test \ +i54633.wrong \ +i54980.aff \ +i54980.dic \ +i54980.good \ +i54980.test \ +i58202.aff \ +i58202.dic \ +i58202.good \ +i58202.sug \ +i58202.test \ +i58202.wrong \ +maputf.aff \ +maputf.dic \ +maputf.sug \ +maputf.wrong \ +maputf.test \ +reputf.aff \ +reputf.dic \ +reputf.sug \ +reputf.wrong \ +reputf.test \ +slash.aff \ +slash.dic \ +slash.good \ +slash.test \ +ignore.aff \ +ignore.dic \ +ignore.good \ +ignore.test \ +ignoreutf.aff \ +ignoreutf.dic \ +ignoreutf.good \ +ignoreutf.test \ +1592880.aff \ +1592880.dic \ +1592880.good \ +1592880.test \ +1695964.aff \ +1695964.dic \ +1695964.sug \ +1695964.test \ +1695964.wrong \ +1463589.aff \ +1463589.dic \ +1463589.sug \ +1463589.test \ +1463589.wrong \ +1463589_utf.aff \ +1463589_utf.dic \ +1463589_utf.sug \ +1463589_utf.test \ +1463589_utf.wrong \ +IJ.aff \ +IJ.dic \ +IJ.good \ +IJ.sug \ +IJ.test \ +IJ.wrong \ +i68568.aff \ +i68568.dic \ +i68568.test \ +i68568.wrong \ +i68568utf.aff \ +i68568utf.dic \ +i68568utf.test \ +i68568utf.wrong \ +1706659.aff \ +1706659.dic \ +1706659.test \ +1706659.wrong \ +digits_in_words.aff \ +digits_in_words.dic \ +digits_in_words.test \ +digits_in_words.wrong \ +colons_in_words.aff \ +colons_in_words.dic \ +colons_in_words.test \ +ngram_utf_fix.aff \ +ngram_utf_fix.dic \ +ngram_utf_fix.good \ +ngram_utf_fix.sug \ +ngram_utf_fix.test \ +ngram_utf_fix.wrong \ +morph.aff \ +morph.dic \ +morph.good \ +morph.morph \ +morph.test \ +1975530.aff \ +1975530.dic \ +1975530.good \ +1975530.test \ +1975530.wrong \ +fullstrip.aff \ +fullstrip.dic \ +fullstrip.good \ +fullstrip.test \ +iconv.aff \ +iconv.dic \ +iconv.good \ +iconv.test \ +oconv.aff \ +oconv.dic \ +oconv.good \ +oconv.sug \ +oconv.test \ +oconv.wrong \ +encoding.aff \ +encoding.dic \ +encoding.good \ +encoding.test \ +opentaal_forbiddenword1.aff \ +opentaal_forbiddenword1.dic \ +opentaal_forbiddenword1.good \ +opentaal_forbiddenword1.sug \ +opentaal_forbiddenword1.test \ +opentaal_forbiddenword1.wrong \ +opentaal_forbiddenword2.aff \ +opentaal_forbiddenword2.dic \ +opentaal_forbiddenword2.good \ +opentaal_forbiddenword2.sug \ +opentaal_forbiddenword2.test \ +opentaal_forbiddenword2.wrong \ +opentaal_forbiddenword2.aff \ +opentaal_forbiddenword2.dic \ +opentaal_forbiddenword2.good \ +opentaal_forbiddenword2.sug \ +opentaal_forbiddenword2.test \ +opentaal_forbiddenword2.wrong \ +opentaal_keepcase.aff \ +opentaal_keepcase.dic \ +opentaal_keepcase.good \ +opentaal_keepcase.sug \ +opentaal_keepcase.test \ +opentaal_keepcase.wrong \ +arabic.aff \ +arabic.dic \ +arabic.wrong \ +arabic.test \ +2970240.aff \ +2970240.dic \ +2970240.good \ +2970240.wrong \ +2970240.test \ +2970242.aff \ +2970242.dic \ +2970242.good \ +2970242.wrong \ +2970242.test \ +breakoff.aff \ +breakoff.dic \ +breakoff.good \ +breakoff.wrong \ +breakoff.test \ +opentaal_cpdpat.aff \ +opentaal_cpdpat.dic \ +opentaal_cpdpat.good \ +opentaal_cpdpat.wrong \ +opentaal_cpdpat.test \ +opentaal_cpdpat2.aff \ +opentaal_cpdpat2.dic \ +opentaal_cpdpat2.good \ +opentaal_cpdpat2.wrong \ +opentaal_cpdpat2.test \ +2999225.aff \ +2999225.dic \ +2999225.good \ +2999225.test \ +korean.aff \ +korean.dic \ +korean.good \ +korean.wrong \ +korean.test \ +onlyincompound2.aff \ +onlyincompound2.dic \ +onlyincompound2.good \ +onlyincompound2.test \ +onlyincompound2.wrong \ +forceucase.aff \ +forceucase.dic \ +forceucase.good \ +forceucase.sug \ +forceucase.wrong \ +forceucase.test \ +warn.aff \ +warn.dic \ +warn.good \ +warn.test + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu tests/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @fail= failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +check-TESTS: $(TESTS) + @failed=0; all=0; xfail=0; xpass=0; skip=0; \ + srcdir=$(srcdir); export srcdir; \ + list=' $(TESTS) '; \ + $(am__tty_colors); \ + if test -n "$$list"; then \ + for tst in $$list; do \ + if test -f ./$$tst; then dir=./; \ + elif test -f $$tst; then dir=; \ + else dir="$(srcdir)/"; fi; \ + if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$tst[\ \ ]*) \ + xpass=`expr $$xpass + 1`; \ + failed=`expr $$failed + 1`; \ + col=$$red; res=XPASS; \ + ;; \ + *) \ + col=$$grn; res=PASS; \ + ;; \ + esac; \ + elif test $$? -ne 77; then \ + all=`expr $$all + 1`; \ + case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$tst[\ \ ]*) \ + xfail=`expr $$xfail + 1`; \ + col=$$lgn; res=XFAIL; \ + ;; \ + *) \ + failed=`expr $$failed + 1`; \ + col=$$red; res=FAIL; \ + ;; \ + esac; \ + else \ + skip=`expr $$skip + 1`; \ + col=$$blu; res=SKIP; \ + fi; \ + echo "$${col}$$res$${std}: $$tst"; \ + done; \ + if test "$$all" -eq 1; then \ + tests="test"; \ + All=""; \ + else \ + tests="tests"; \ + All="All "; \ + fi; \ + if test "$$failed" -eq 0; then \ + if test "$$xfail" -eq 0; then \ + banner="$$All$$all $$tests passed"; \ + else \ + if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \ + banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \ + fi; \ + else \ + if test "$$xpass" -eq 0; then \ + banner="$$failed of $$all $$tests failed"; \ + else \ + if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \ + banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \ + fi; \ + fi; \ + dashes="$$banner"; \ + skipped=""; \ + if test "$$skip" -ne 0; then \ + if test "$$skip" -eq 1; then \ + skipped="($$skip test was not run)"; \ + else \ + skipped="($$skip tests were not run)"; \ + fi; \ + test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$skipped"; \ + fi; \ + report=""; \ + if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \ + report="Please report to $(PACKAGE_BUGREPORT)"; \ + test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \ + dashes="$$report"; \ + fi; \ + dashes=`echo "$$dashes" | sed s/./=/g`; \ + if test "$$failed" -eq 0; then \ + echo "$$grn$$dashes"; \ + else \ + echo "$$red$$dashes"; \ + fi; \ + echo "$$banner"; \ + test -z "$$skipped" || echo "$$skipped"; \ + test -z "$$report" || echo "$$report"; \ + echo "$$dashes$$std"; \ + test "$$failed" -eq 0; \ + else :; fi + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + fi; \ + done + @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ + $(am__relativize); \ + new_distdir=$$reldir; \ + dir1=$$subdir; dir2="$(top_distdir)"; \ + $(am__relativize); \ + new_top_distdir=$$reldir; \ + echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ + echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ + ($(am__cd) $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$new_top_distdir" \ + distdir="$$new_distdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + am__skip_mode_fix=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-local \ + distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +html-am: + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-dvi-am: + +install-exec-am: + +install-html: install-html-recursive + +install-html-am: + +install-info: install-info-recursive + +install-info-am: + +install-man: + +install-pdf: install-pdf-recursive + +install-pdf-am: + +install-ps: install-ps-recursive + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) check-am \ + ctags-recursive install-am install-strip tags-recursive + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-TESTS check-am clean clean-generic \ + clean-libtool ctags ctags-recursive distclean \ + distclean-generic distclean-libtool distclean-local \ + distclean-tags distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + installdirs-am maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \ + ps ps-am tags tags-recursive uninstall uninstall-am + + +# infixes.test + +distclean-local: + -rm -rf testSubDir + +# infixes.aff +# infixes.dic +# infixes.good +# infixes.test + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff b/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff new file mode 100644 index 0000000000..cf3c500218 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.aff @@ -0,0 +1,7 @@ +# simple example for affix compression (see Hunspell(4)) +PFX A Y 1 +PFX A 0 re . + +SFX B Y 2 +SFX B 0 ed [^y] +SFX B y ied y diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic b/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic new file mode 100644 index 0000000000..e228043ef1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.dic @@ -0,0 +1,4 @@ +3 +hello +try/B +work/AB diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.good b/extensions/spellcheck/hunspell/tests/unit/data/affixes.good new file mode 100644 index 0000000000..20097e8e6c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.good @@ -0,0 +1,7 @@ +hello +try +tried +work +worked +rework +reworked diff --git a/extensions/spellcheck/hunspell/tests/unit/data/affixes.test b/extensions/spellcheck/hunspell/tests/unit/data/affixes.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/affixes.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias.aff new file mode 100644 index 0000000000..3fbce0ac40 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.aff @@ -0,0 +1,12 @@ +# aliases for flag vectors (AF) +# AB -> 1 +# A -> 2 +AF 2 +AF AB +AF A + +SFX A Y 1 +SFX A 0 x . + +SFX B Y 1 +SFX B 0 y/2 . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias.dic new file mode 100644 index 0000000000..e0af3c918a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.dic @@ -0,0 +1,2 @@ +1 +foo/1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.good b/extensions/spellcheck/hunspell/tests/unit/data/alias.good new file mode 100644 index 0000000000..71702f2317 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.good @@ -0,0 +1,4 @@ +foo +foox +fooy +fooyx diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias.test b/extensions/spellcheck/hunspell/tests/unit/data/alias.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff new file mode 100644 index 0000000000..66a183833d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.aff @@ -0,0 +1,17 @@ +# aliases for flag vectors (AF) and morphological descriptions (AM) +# AB -> 1 +# A -> 2 +AF 2 +AF AB +AF A + +AM 3 +AM is:affix_x +AM ds:affix_y +AM po:noun xx:other_data + +SFX A Y 1 +SFX A 0 x . 1 + +SFX B Y 1 +SFX B 0 y/2 . 2 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic new file mode 100644 index 0000000000..60300aceef --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.dic @@ -0,0 +1,2 @@ +1 +foo/1 3 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.good b/extensions/spellcheck/hunspell/tests/unit/data/alias2.good new file mode 100644 index 0000000000..71702f2317 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.good @@ -0,0 +1,4 @@ +foo +foox +fooy +fooyx diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph b/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph new file mode 100644 index 0000000000..01f983d57b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.morph @@ -0,0 +1,12 @@ +> foo +analyze(foo) = st:foo po:noun xx:other_data +stem(foo) = foo +> foox +analyze(foox) = st:foo po:noun xx:other_data is:affix_x +stem(foox) = foo +> fooy +analyze(fooy) = st:foo po:noun xx:other_data ds:affix_y +stem(fooy) = fooy +> fooyx +analyze(fooyx) = st:foo po:noun xx:other_data ds:affix_y is:affix_x +stem(fooyx) = fooy diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias2.test b/extensions/spellcheck/hunspell/tests/unit/data/alias2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff b/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff new file mode 100644 index 0000000000..a328185008 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.aff @@ -0,0 +1,18 @@ +# morph. aliases with complex prefixes +COMPLEXPREFIXES +WORDCHARS _ + +AM 4 +AM affix_1/ +AM affix_2/ +AM /suffix_1 +AM [stem_1] + +PFX A Y 1 +PFX A 0 tek . 1 + +PFX B Y 1 +PFX B 0 met/A . 2 + +SFX C Y 1 +SFX C 0 _test_ . 3 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic b/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic new file mode 100644 index 0000000000..f22567cbe1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.dic @@ -0,0 +1,2 @@ +1 +ouro/BC 4 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.good b/extensions/spellcheck/hunspell/tests/unit/data/alias3.good new file mode 100644 index 0000000000..6bf822826b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.good @@ -0,0 +1,4 @@ +ouro +metouro +tekmetouro +ouro_test_ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph b/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph new file mode 100644 index 0000000000..33edf5cee7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.morph @@ -0,0 +1,8 @@ +> ouro +analyze(ouro) = [stem_1] ouro:ts +> metouro +analyze(metouro) = affix_2/ ouro:ts [stem_1] +> tekmetouro +analyze(tekmetouro) = affix_1/ affix_2/ ouro:ts [stem_1] +> ouro_test_ +analyze(ouro_test_) = [stem_1] ouro:ts /suffix_1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/alias3.test b/extensions/spellcheck/hunspell/tests/unit/data/alias3.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/alias3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff new file mode 100644 index 0000000000..a117625685 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.aff @@ -0,0 +1,6 @@ +SET UTF-8 +WORDCHARS '. + +SFX S N 1 +SFX S 0 's . + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic new file mode 100644 index 0000000000..7d3cdcc046 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.dic @@ -0,0 +1,3 @@ +2 +OpenOffice.org +UNICEF/S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good new file mode 100644 index 0000000000..3afd877d9d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.good @@ -0,0 +1,4 @@ +OpenOffice.org +OPENOFFICE.ORG +UNICEF's +UNICEF'S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug new file mode 100644 index 0000000000..d372ff23d6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.sug @@ -0,0 +1,3 @@ +OpenOffice.org +UNICEF +UNICEF's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong new file mode 100644 index 0000000000..668194906e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps-utf.wrong @@ -0,0 +1,3 @@ +Openoffice.org +Unicef +Unicef's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff new file mode 100644 index 0000000000..57e916bf53 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.aff @@ -0,0 +1,5 @@ +# check uppercase forms of allcaps word + affix and words with mixed casing +WORDCHARS '. + +SFX S N 1 +SFX S 0 's . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic new file mode 100644 index 0000000000..7d3cdcc046 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.dic @@ -0,0 +1,3 @@ +2 +OpenOffice.org +UNICEF/S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good new file mode 100644 index 0000000000..3afd877d9d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.good @@ -0,0 +1,4 @@ +OpenOffice.org +OPENOFFICE.ORG +UNICEF's +UNICEF'S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug new file mode 100644 index 0000000000..d372ff23d6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.sug @@ -0,0 +1,3 @@ +OpenOffice.org +UNICEF +UNICEF's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong new file mode 100644 index 0000000000..668194906e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps.wrong @@ -0,0 +1,3 @@ +Openoffice.org +Unicef +Unicef's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff new file mode 100644 index 0000000000..67022d6eb0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.aff @@ -0,0 +1,6 @@ +# forbidden all caps words are case sensitive +# iPod -> ipodos ("iPodic" in Hungarian) +FORBIDDENWORD * +SFX s N 1 +SFX s 0 os . + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic new file mode 100644 index 0000000000..be21bfb40b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.dic @@ -0,0 +1,4 @@ +3 +iPod/s +iPodos/* +ipodos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good new file mode 100644 index 0000000000..5fd2f82ce4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.good @@ -0,0 +1,4 @@ +iPod +IPOD +ipodos +IPODOS diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug new file mode 100644 index 0000000000..5c312d7b56 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.sug @@ -0,0 +1,2 @@ +iPod +ipodos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong new file mode 100644 index 0000000000..010967be67 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps2.wrong @@ -0,0 +1,2 @@ +ipod +iPodos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff new file mode 100644 index 0000000000..789818e1a5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.aff @@ -0,0 +1,10 @@ +# homonym support +WORDCHARS ' + +SFX s N 1 +SFX s 0 s . + +SFX S N 1 +SFX S 0 's . + + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic new file mode 100644 index 0000000000..e903a0fa96 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.dic @@ -0,0 +1,7 @@ +4 +UNESCO/S +Unesco/S +Nasa/S +NASA/S +ACTS +act/s diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good new file mode 100644 index 0000000000..b9930a24dd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.good @@ -0,0 +1,13 @@ +UNESCO +Unesco +UNESCO's +Unesco's +UNESCO'S +NASA +Nasa +NASA's +Nasa's +NASA'S +ACTS +acts +Acts diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong new file mode 100644 index 0000000000..89172b8245 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/allcaps3.wrong @@ -0,0 +1,4 @@ +unesco +unesco's +nasa +nasa's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff b/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff new file mode 100644 index 0000000000..f8dd5cf244 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.aff @@ -0,0 +1,6 @@ +SET UTF-8 +TRY أ +IGNORE ٌٍَُِّْ + +PFX Aa Y 1 +PFX Aa 0 0/X0 أ[^ي] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic b/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic new file mode 100644 index 0000000000..9a2035def2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.dic @@ -0,0 +1,2 @@ +1 +ب diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.test b/extensions/spellcheck/hunspell/tests/unit/data/arabic.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong b/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong new file mode 100644 index 0000000000..9b566c3646 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/arabic.wrong @@ -0,0 +1 @@ +ـ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff new file mode 100644 index 0000000000..493157b301 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.aff @@ -0,0 +1,198 @@ +# OpenOffice.org’s en_US.aff file +# with Unicode apostrophe: ’ + +SET UTF-8 +TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ' + +MAXNGRAMSUGS 1 +WORDCHARS .'’ + +PFX A Y 1 +PFX A 0 re . + +PFX I Y 1 +PFX I 0 in . + +PFX U Y 1 +PFX U 0 un . + +PFX C Y 1 +PFX C 0 de . + +PFX E Y 1 +PFX E 0 dis . + +PFX F Y 1 +PFX F 0 con . + +PFX K Y 1 +PFX K 0 pro . + +SFX V N 2 +SFX V e ive e +SFX V 0 ive [^e] + +SFX N Y 3 +SFX N e ion e +SFX N y ication y +SFX N 0 en [^ey] + +SFX X Y 3 +SFX X e ions e +SFX X y ications y +SFX X 0 ens [^ey] + +SFX H N 2 +SFX H y ieth y +SFX H 0 th [^y] + +SFX Y Y 1 +SFX Y 0 ly . + +SFX G Y 2 +SFX G e ing e +SFX G 0 ing [^e] + +SFX J Y 2 +SFX J e ings e +SFX J 0 ings [^e] + +SFX D Y 4 +SFX D 0 d e +SFX D y ied [^aeiou]y +SFX D 0 ed [^ey] +SFX D 0 ed [aeiou]y + +SFX T N 4 +SFX T 0 st e +SFX T y iest [^aeiou]y +SFX T 0 est [aeiou]y +SFX T 0 est [^ey] + +SFX R Y 4 +SFX R 0 r e +SFX R y ier [^aeiou]y +SFX R 0 er [aeiou]y +SFX R 0 er [^ey] + +SFX Z Y 4 +SFX Z 0 rs e +SFX Z y iers [^aeiou]y +SFX Z 0 ers [aeiou]y +SFX Z 0 ers [^ey] + +SFX S Y 4 +SFX S y ies [^aeiou]y +SFX S 0 s [aeiou]y +SFX S 0 es [sxzh] +SFX S 0 s [^sxzhy] + +SFX P Y 3 +SFX P y iness [^aeiou]y +SFX P 0 ness [aeiou]y +SFX P 0 ness [^y] + +SFX M Y 1 +SFX M 0 's . + +SFX B Y 3 +SFX B 0 able [^aeiou] +SFX B 0 able ee +SFX B e able [^aeiou]e + +SFX L Y 1 +SFX L 0 ment . + +REP 88 +REP a ei +REP ei a +REP a ey +REP ey a +REP ai ie +REP ie ai +REP are air +REP are ear +REP are eir +REP air are +REP air ere +REP ere air +REP ere ear +REP ere eir +REP ear are +REP ear air +REP ear ere +REP eir are +REP eir ere +REP ch te +REP te ch +REP ch ti +REP ti ch +REP ch tu +REP tu ch +REP ch s +REP s ch +REP ch k +REP k ch +REP f ph +REP ph f +REP gh f +REP f gh +REP i igh +REP igh i +REP i uy +REP uy i +REP i ee +REP ee i +REP j di +REP di j +REP j gg +REP gg j +REP j ge +REP ge j +REP s ti +REP ti s +REP s ci +REP ci s +REP k cc +REP cc k +REP k qu +REP qu k +REP kw qu +REP o eau +REP eau o +REP o ew +REP ew o +REP oo ew +REP ew oo +REP ew ui +REP ui ew +REP oo ui +REP ui oo +REP ew u +REP u ew +REP oo u +REP u oo +REP u oe +REP oe u +REP u ieu +REP ieu u +REP ue ew +REP ew ue +REP uff ough +REP oo ieu +REP ieu oo +REP ier ear +REP ear ier +REP ear air +REP air ear +REP w qu +REP qu w +REP z ss +REP ss z +REP shun tion +REP shun sion +REP shun cion +McDonalds’sá/w +McDonald’sszá/g3) st:McDonaldâs po:noun_prs is:TRANS +McDonald’sszal/g4) st:McDonaldâs po:noun_prs is:INSTR +McDonald’ssal/w diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic new file mode 100644 index 0000000000..b2b536d285 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.dic @@ -0,0 +1,29 @@ +28 +created/U +create/XKVNGADS +imply/GNSDX +natural/PUY +like/USPBY +convey/BDGS +look/GZRDS +text +hello +said +sawyer +NASA +rotten +day +tomorrow +seven +FAQ/SM +can’t +doesn’t +etc +won’t +lip +text +horrifying +speech +suggest +uncreate/V +Hunspell diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good new file mode 100644 index 0000000000..4c73e42b8e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.good @@ -0,0 +1,27 @@ +created +uncreate +uncreated +imply +implied +unnatural +conveyed +sawyer +NASA +FAQs +can’t +doesn’t +won’t +Created +Hello +HELLO +NASA +etc. +etc +HELLO +lip. +text. +NASA. +Text. +TEXT. +Hunspell. +HUNSPELL. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug new file mode 100644 index 0000000000..990b640cf3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.sug @@ -0,0 +1,11 @@ +looked, look +text, create +hello +said +rotten day, rotten-day, rotten +tomorrow, rotten +seven +NASA +horrifying +speech, Hunspell +suggest diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong new file mode 100644 index 0000000000..88a6e25204 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base-utf.wrong @@ -0,0 +1,11 @@ +loooked +texxt +hlelo +seid +rottenday +tomorow +seeeven +Nasa +horrorfying +peech +sugesst diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.aff b/extensions/spellcheck/hunspell/tests/unit/data/base.aff new file mode 100644 index 0000000000..632f04b96c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.aff @@ -0,0 +1,192 @@ +# OpenOffice.org's en_US.aff file + +SET ISO8859-1 +TRY esianrtolcdugmphbyfvkwz' + +WORDCHARS .' + +PFX A Y 1 +PFX A 0 re . + +PFX I Y 1 +PFX I 0 in . + +PFX U Y 1 +PFX U 0 un . + +PFX C Y 1 +PFX C 0 de . + +PFX E Y 1 +PFX E 0 dis . + +PFX F Y 1 +PFX F 0 con . + +PFX K Y 1 +PFX K 0 pro . + +SFX V N 2 +SFX V e ive e +SFX V 0 ive [^e] + +SFX N Y 3 +SFX N e ion e +SFX N y ication y +SFX N 0 en [^ey] + +SFX X Y 3 +SFX X e ions e +SFX X y ications y +SFX X 0 ens [^ey] + +SFX H N 2 +SFX H y ieth y +SFX H 0 th [^y] + +SFX Y Y 1 +SFX Y 0 ly . + +SFX G Y 2 +SFX G e ing e +SFX G 0 ing [^e] + +SFX J Y 2 +SFX J e ings e +SFX J 0 ings [^e] + +SFX D Y 4 +SFX D 0 d e +SFX D y ied [^aeiou]y +SFX D 0 ed [^ey] +SFX D 0 ed [aeiou]y + +SFX T N 4 +SFX T 0 st e +SFX T y iest [^aeiou]y +SFX T 0 est [aeiou]y +SFX T 0 est [^ey] + +SFX R Y 4 +SFX R 0 r e +SFX R y ier [^aeiou]y +SFX R 0 er [aeiou]y +SFX R 0 er [^ey] + +SFX Z Y 4 +SFX Z 0 rs e +SFX Z y iers [^aeiou]y +SFX Z 0 ers [aeiou]y +SFX Z 0 ers [^ey] + +SFX S Y 4 +SFX S y ies [^aeiou]y +SFX S 0 s [aeiou]y +SFX S 0 es [sxzh] +SFX S 0 s [^sxzhy] + +SFX P Y 3 +SFX P y iness [^aeiou]y +SFX P 0 ness [aeiou]y +SFX P 0 ness [^y] + +SFX M Y 1 +SFX M 0 's . + +SFX B Y 3 +SFX B 0 able [^aeiou] +SFX B 0 able ee +SFX B e able [^aeiou]e + +SFX L Y 1 +SFX L 0 ment . + +REP 88 +REP a ei +REP ei a +REP a ey +REP ey a +REP ai ie +REP ie ai +REP are air +REP are ear +REP are eir +REP air are +REP air ere +REP ere air +REP ere ear +REP ere eir +REP ear are +REP ear air +REP ear ere +REP eir are +REP eir ere +REP ch te +REP te ch +REP ch ti +REP ti ch +REP ch tu +REP tu ch +REP ch s +REP s ch +REP ch k +REP k ch +REP f ph +REP ph f +REP gh f +REP f gh +REP i igh +REP igh i +REP i uy +REP uy i +REP i ee +REP ee i +REP j di +REP di j +REP j gg +REP gg j +REP j ge +REP ge j +REP s ti +REP ti s +REP s ci +REP ci s +REP k cc +REP cc k +REP k qu +REP qu k +REP kw qu +REP o eau +REP eau o +REP o ew +REP ew o +REP oo ew +REP ew oo +REP ew ui +REP ui ew +REP oo ui +REP ui oo +REP ew u +REP u ew +REP oo u +REP u oo +REP u oe +REP oe u +REP u ieu +REP ieu u +REP ue ew +REP ew ue +REP uff ough +REP oo ieu +REP ieu oo +REP ier ear +REP ear ier +REP ear air +REP air ear +REP w qu +REP qu w +REP z ss +REP ss z +REP shun tion +REP shun sion +REP shun cion diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.dic b/extensions/spellcheck/hunspell/tests/unit/data/base.dic new file mode 100644 index 0000000000..5d9b8a28b5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.dic @@ -0,0 +1,29 @@ +28 +created/U +create/XKVNGADS +imply/GNSDX +natural/PUY +like/USPBY +convey/BDGS +look/GZRDS +text +hello +said +sawyer +NASA +rotten +day +tomorrow +seven +FAQ/SM +can't +doesn't +etc +won't +lip +text +horrifying +speech +suggest +uncreate/V +Hunspell diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.good b/extensions/spellcheck/hunspell/tests/unit/data/base.good new file mode 100644 index 0000000000..8e7f88e2b4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.good @@ -0,0 +1,27 @@ +created +uncreate +uncreated +imply +implied +unnatural +conveyed +sawyer +NASA +FAQs +can't +doesn't +won't +Created +Hello +HELLO +NASA +etc. +etc +HELLO +lip. +text. +NASA. +Text. +TEXT. +Hunspell. +HUNSPELL. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.sug b/extensions/spellcheck/hunspell/tests/unit/data/base.sug new file mode 100644 index 0000000000..553280a6aa --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.sug @@ -0,0 +1,11 @@ +looked, look +text +hello +said +rotten day, rotten-day, rotten +tomorrow +seven +NASA +horrifying +speech +suggest diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.test b/extensions/spellcheck/hunspell/tests/unit/data/base.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/base.wrong b/extensions/spellcheck/hunspell/tests/unit/data/base.wrong new file mode 100644 index 0000000000..88a6e25204 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/base.wrong @@ -0,0 +1,11 @@ +loooked +texxt +hlelo +seid +rottenday +tomorow +seeeven +Nasa +horrorfying +peech +sugesst diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.aff b/extensions/spellcheck/hunspell/tests/unit/data/break.aff new file mode 100644 index 0000000000..47b8f6b7be --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/break.aff @@ -0,0 +1,8 @@ +# word break points test, recursive break at dash and n-dash +SET UTF-8 + +BREAK 2 +BREAK - +BREAK – + +WORDCHARS -– diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.dic b/extensions/spellcheck/hunspell/tests/unit/data/break.dic new file mode 100644 index 0000000000..f3d2aa02fd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/break.dic @@ -0,0 +1,4 @@ +3 +foo +bar +fox-bax diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.good b/extensions/spellcheck/hunspell/tests/unit/data/break.good new file mode 100644 index 0000000000..5f08bfd2f1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/break.good @@ -0,0 +1,7 @@ +foo +bar +fox-bax +foo-bar +foo–bar +foo-bar-foo-bar +foo-bar–foo-bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.test b/extensions/spellcheck/hunspell/tests/unit/data/break.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/break.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/break.wrong b/extensions/spellcheck/hunspell/tests/unit/data/break.wrong new file mode 100644 index 0000000000..599ed9f7ff --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/break.wrong @@ -0,0 +1,12 @@ +fox +bax +-foo +bar- +fox-bar +foo-bax +foo–bax +fox–bar +foo-bar-fox-bar +foo-bax-foo-bar +foo-bar–fox-bar +foo-bax–foo-bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff new file mode 100644 index 0000000000..a13f464a60 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.aff @@ -0,0 +1,6 @@ +# default word break at hyphens and n-dashes + +SET UTF-8 +MAXNGRAMSUGS 0 +WORDCHARS - +TRY ot diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic new file mode 100644 index 0000000000..bf29960357 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.dic @@ -0,0 +1,6 @@ +3 +foo +bar +free +scott +scot-free diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good new file mode 100644 index 0000000000..8d81254571 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.good @@ -0,0 +1,7 @@ +foo +bar +foo- +-foo +scot-free +foo-bar +foo-bar-foo-bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug new file mode 100644 index 0000000000..8bfc69d936 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.sug @@ -0,0 +1,3 @@ +scott +scot-free +foo-bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong new file mode 100644 index 0000000000..c3b203a7f2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakdefault.wrong @@ -0,0 +1,3 @@ +scot +sco-free +fo-bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff new file mode 100644 index 0000000000..2e83d38023 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.aff @@ -0,0 +1,7 @@ +# switch off default word break at hyphens and n-dashes by BREAK 0 +SET UTF-8 +MAXNGRAMSUGS 0 +WORDCHARS - +TRY ot + +BREAK 0 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic new file mode 100644 index 0000000000..bf29960357 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.dic @@ -0,0 +1,6 @@ +3 +foo +bar +free +scott +scot-free diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good new file mode 100644 index 0000000000..854b39efad --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.good @@ -0,0 +1,3 @@ +foo +bar +scot-free diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong new file mode 100644 index 0000000000..a6fcf7f1e2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/breakoff.wrong @@ -0,0 +1,5 @@ +foo- +-foo +foo-bar +foo-bar-foo-bar +scot diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff new file mode 100644 index 0000000000..7ac46eeab7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.aff @@ -0,0 +1,3 @@ +# forbid upper case letters at word bounds in compounding +CHECKCOMPOUNDCASE +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic new file mode 100644 index 0000000000..80f65d38f6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.dic @@ -0,0 +1,5 @@ +4 +foo/A +Bar/A +BAZ/A +-/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good new file mode 100644 index 0000000000..9cbd79064d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.good @@ -0,0 +1,5 @@ +Barfoo +foo-Bar +foo-BAZ +BAZ-foo +BAZ-Bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong new file mode 100644 index 0000000000..0714c22e5d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase.wrong @@ -0,0 +1,3 @@ +fooBar +BAZBar +BAZfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff new file mode 100644 index 0000000000..fea046b195 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.aff @@ -0,0 +1,3 @@ +# check extended ascii +CHECKCOMPOUNDCASE +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic new file mode 100644 index 0000000000..086de0aed7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.dic @@ -0,0 +1,3 @@ +2 +o/A +o/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good new file mode 100644 index 0000000000..b38fd0c6c0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.good @@ -0,0 +1,2 @@ +oo +oo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong new file mode 100644 index 0000000000..94786e95b3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcase2.wrong @@ -0,0 +1 @@ +oo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff new file mode 100644 index 0000000000..546f478a52 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.aff @@ -0,0 +1,3 @@ +SET UTF-8 +CHECKCOMPOUNDCASE +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic new file mode 100644 index 0000000000..0b7fbc9acf --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.dic @@ -0,0 +1,3 @@ +2 +áoó/A +Óoá/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good new file mode 100644 index 0000000000..32ae1353c9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.good @@ -0,0 +1,2 @@ +áoóáoó +Óoááoó diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong new file mode 100644 index 0000000000..07434ccae5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundcaseutf.wrong @@ -0,0 +1 @@ +áoóÓoá diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff new file mode 100644 index 0000000000..5cd357a5a5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.aff @@ -0,0 +1,3 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDDUP +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic new file mode 100644 index 0000000000..8ac75f4fc5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.dic @@ -0,0 +1,3 @@ +2 +foo/A +bar/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good new file mode 100644 index 0000000000..3866f24cae --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.good @@ -0,0 +1,5 @@ +barfoo +foobar +foofoobar +foobarfoo +barfoobarfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong new file mode 100644 index 0000000000..5e809b3d8c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompounddup.wrong @@ -0,0 +1,3 @@ +foofoo +foofoofoo +foobarbar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff new file mode 100644 index 0000000000..dfda51af27 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.aff @@ -0,0 +1,5 @@ +# forbid compounds with spec. pattern at word bounds +COMPOUNDFLAG A +CHECKCOMPOUNDPATTERN 2 +CHECKCOMPOUNDPATTERN nny ny +CHECKCOMPOUNDPATTERN ssz sz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic new file mode 100644 index 0000000000..09300f0bcd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.dic @@ -0,0 +1,5 @@ +4 +knny/A +nyels/A +hossz/A +szmts/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good new file mode 100644 index 0000000000..0f99c52d24 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.good @@ -0,0 +1,2 @@ +knnyszmts +hossznyels diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong new file mode 100644 index 0000000000..5edd115342 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern.wrong @@ -0,0 +1,4 @@ +knnynyels +hosszszmts +hosszknnynyels +knnynyelshossz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff new file mode 100644 index 0000000000..fdf6560b4f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.aff @@ -0,0 +1,7 @@ +# forbid compounds with spec. pattern at word bound and allow modificated form +# (for German and Indian languages) +COMPOUNDFLAG A +CHECKCOMPOUNDPATTERN 2 +CHECKCOMPOUNDPATTERN o b z +CHECKCOMPOUNDPATTERN oo ba u +COMPOUNDMIN 1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic new file mode 100644 index 0000000000..8ac75f4fc5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.dic @@ -0,0 +1,3 @@ +2 +foo/A +bar/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good new file mode 100644 index 0000000000..eaad4f902b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.good @@ -0,0 +1,3 @@ +barfoo +fozar +fur diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong new file mode 100644 index 0000000000..323fae03f4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern2.wrong @@ -0,0 +1 @@ +foobar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff new file mode 100644 index 0000000000..6c2cfa4aa9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.aff @@ -0,0 +1,6 @@ +# forbid compounds with spec. pattern at word bound and allow modificated form +# (for Indian languages) +COMPOUNDFLAG A +CHECKCOMPOUNDPATTERN 1 +CHECKCOMPOUNDPATTERN o/X b/Y z +COMPOUNDMIN 1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic new file mode 100644 index 0000000000..6bd1b7fc9e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.dic @@ -0,0 +1,5 @@ +4 +foo/A +boo/AX +bar/A +ban/AY diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good new file mode 100644 index 0000000000..6070eff5c5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.good @@ -0,0 +1,9 @@ +bozan +barfoo +banfoo +banbar +foobar +fooban +foobanbar +boobar +boobarfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong new file mode 100644 index 0000000000..41d8d37471 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern3.wrong @@ -0,0 +1,8 @@ +booban +boobanfoo +fozar +fozarfoo +fozan +fozanfoo +bozar +bozarfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff new file mode 100644 index 0000000000..ef25663080 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.aff @@ -0,0 +1,8 @@ +# sandhi in Telugu writing system, based on the Kiran Chittella's example + +COMPOUNDFLAG x +COMPOUNDMIN 1 +CHECKCOMPOUNDPATTERN 2 +CHECKCOMPOUNDPATTERN a/A u/A O +CHECKCOMPOUNDPATTERN u/B u/B u + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic new file mode 100644 index 0000000000..d245ef0196 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.dic @@ -0,0 +1,6 @@ +4 +sUrya/Ax +udayaM/Ax +pEru/Bx +unna/Bx + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good new file mode 100644 index 0000000000..48761b6ee0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.good @@ -0,0 +1,2 @@ +sUryOdayaM +pErunna diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong new file mode 100644 index 0000000000..a357fec522 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundpattern4.wrong @@ -0,0 +1,2 @@ +sUryaudayaM +pEruunna diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff new file mode 100644 index 0000000000..4fb7ff55e5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.aff @@ -0,0 +1,8 @@ +// forbid compound word, if it is also a non compound word with a REP fault +// In example: Hungarian `szervz' (szer+vz) compound word is forbidden, because +// this word is also a dictionary word (szerviz) with typical fault (i->) +CHECKCOMPOUNDREP +COMPOUNDFLAG A + +REP 1 +REP i diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic new file mode 100644 index 0000000000..030bda916c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.dic @@ -0,0 +1,5 @@ +3 +szer/A +vz/A +szerviz +kocsi/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good new file mode 100644 index 0000000000..c95c03c875 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.good @@ -0,0 +1,2 @@ +vzszer +szerkocsi
\ No newline at end of file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong new file mode 100644 index 0000000000..8c8701d472 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundrep.wrong @@ -0,0 +1,3 @@ +szervz +szervzkocsi +kocsiszervz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff new file mode 100644 index 0000000000..7159cf55dd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.aff @@ -0,0 +1,3 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDTRIPLE +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic new file mode 100644 index 0000000000..607c489e8b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.dic @@ -0,0 +1,5 @@ +4 +foo/A +opera/A +eel/A +bare/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good new file mode 100644 index 0000000000..1293f749ad --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.good @@ -0,0 +1,6 @@ +operafoo +operaeel +operabare +eelbare +eelfoo +eelopera diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong new file mode 100644 index 0000000000..ae2d02b20d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checkcompoundtriple.wrong @@ -0,0 +1,2 @@ +fooopera +bareeel diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff new file mode 100644 index 0000000000..6b22c73906 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.aff @@ -0,0 +1,4 @@ +# test - SS special capitalizing +CHECKSHARPS +WORDCHARS . +KEEPCASE k diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic new file mode 100644 index 0000000000..91d14ab9ef --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.dic @@ -0,0 +1,7 @@ +6 +mig/k +Aussto +Absto. +Auenabmessung +Prozessionsstrae +Auenmae diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good new file mode 100644 index 0000000000..e9be8c5c79 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.good @@ -0,0 +1,13 @@ +mig +Mig +MSSIG +Aussto +Absto. +Auenabmessung +Prozessionsstrae +Auenmae +AUSSTOSS +ABSTOSS. +AUSSENABMESSUNG +PROZESSIONSSTRASSE +AUSSENMASSE diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug new file mode 100644 index 0000000000..52c6a943be --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.sug @@ -0,0 +1 @@ +MSSIG, mig diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong new file mode 100644 index 0000000000..96eb8aea7c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharps.wrong @@ -0,0 +1 @@ +MIG diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff new file mode 100644 index 0000000000..86c0fc426f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.aff @@ -0,0 +1,5 @@ +# test - SS special capitalizing in UTF-8 +SET UTF-8 +CHECKSHARPS +WORDCHARS ß. +KEEPCASE k diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic new file mode 100644 index 0000000000..9cc364eec1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.dic @@ -0,0 +1,7 @@ +6 +müßig/k +Ausstoß +Abstoß. +Außenabmessung +Prozessionsstraße +Außenmaße diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good new file mode 100644 index 0000000000..a61c243193 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.good @@ -0,0 +1,13 @@ +müßig +Müßig +MÜSSIG +Ausstoß +Abstoß. +Außenabmessung +Prozessionsstraße +Außenmaße +AUSSTOSS +ABSTOSS. +AUSSENABMESSUNG +PROZESSIONSSTRASSE +AUSSENMASSE diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug new file mode 100644 index 0000000000..ab68568e52 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.sug @@ -0,0 +1 @@ +MÜSSIG, müßig diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong new file mode 100644 index 0000000000..25eb03dcec --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/checksharpsutf.wrong @@ -0,0 +1 @@ +MÜßIG diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff new file mode 100644 index 0000000000..1eecc644b0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.aff @@ -0,0 +1,16 @@ +# circumfixes: ~ obligate prefix/suffix combinations +# superlative in Hungarian: leg- (prefix) AND -bb (suffix) + +CIRCUMFIX X + +PFX A Y 1 +PFX A 0 leg/X . + +PFX B Y 1 +PFX B 0 legesleg/X . + +SFX C Y 3 +SFX C 0 obb . is:COMPARATIVE +SFX C 0 obb/AX . is:SUPERLATIVE +SFX C 0 obb/BX . is:SUPERSUPERLATIVE + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic new file mode 100644 index 0000000000..ba96f046dd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.dic @@ -0,0 +1,2 @@ +1 +nagy/C po:adj diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good new file mode 100644 index 0000000000..65049d9f01 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.good @@ -0,0 +1,4 @@ +nagy +nagyobb +legnagyobb +legeslegnagyobb diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph new file mode 100644 index 0000000000..62e6c5371b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.morph @@ -0,0 +1,12 @@ +> nagy +analyze(nagy) = st:nagy po:adj +stem(nagy) = nagy +> nagyobb +analyze(nagyobb) = st:nagy po:adj is:COMPARATIVE +stem(nagyobb) = nagy +> legnagyobb +analyze(legnagyobb) = fl:A st:nagy po:adj is:SUPERLATIVE +stem(legnagyobb) = nagy +> legeslegnagyobb +analyze(legeslegnagyobb) = fl:B st:nagy po:adj is:SUPERSUPERLATIVE +stem(legeslegnagyobb) = nagy diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong new file mode 100644 index 0000000000..bab8084ee1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/circumfix.wrong @@ -0,0 +1,2 @@ +legnagy +legeslegnagy diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff new file mode 100644 index 0000000000..d08022694e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.aff @@ -0,0 +1,3 @@ +# Colons in Finnish and Swedish words. Problem reported by Lars Aronsson. +# Parsing test (src/parsers) +WORDCHARS : diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic new file mode 100644 index 0000000000..bfea1ccc7e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.dic @@ -0,0 +1,4 @@ +2 +c:a +S:t +foo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/colons-in-words.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff new file mode 100644 index 0000000000..7ddb497a62 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.aff @@ -0,0 +1,9 @@ +# set twofold prefix stripping +# Coptic example by Moheb Mekhaiel +COMPLEXPREFIXES + +PFX A Y 1 +PFX A 0 tek . + +PFX B Y 1 +PFX B 0 met/A . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic new file mode 100644 index 0000000000..2618c7cf43 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.dic @@ -0,0 +1,3 @@ +1 +ouro/B + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good new file mode 100644 index 0000000000..eed87a7746 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.good @@ -0,0 +1,3 @@ +ouro +metouro +tekmetouro diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong new file mode 100644 index 0000000000..fb1c8b4835 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes.wrong @@ -0,0 +1,2 @@ +tekouro +mettekouro diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff new file mode 100644 index 0000000000..b4fe1dca61 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.aff @@ -0,0 +1,12 @@ +# complex prefixes with morphological analysis +COMPLEXPREFIXES +WORDCHARS _ + +PFX A Y 1 +PFX A 0 tek . affix_1/ + +PFX B Y 1 +PFX B 0 met/A . affix_2/ + +SFX C Y 1 +SFX C 0 _test_ . /suffix_1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic new file mode 100644 index 0000000000..7e4baf06c1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.dic @@ -0,0 +1,3 @@ +1 +ouro/BC [stem_1] + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good new file mode 100644 index 0000000000..6bf822826b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.good @@ -0,0 +1,4 @@ +ouro +metouro +tekmetouro +ouro_test_ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixes2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff new file mode 100644 index 0000000000..3991e9f5ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.aff @@ -0,0 +1,12 @@ +# Coptic example by Moheb Mekhaiel +# Encoded with the new Coptic character encoding of Unicode 4.1 +SET UTF-8 + +# set twofold prefix stripping +COMPLEXPREFIXES + +PFX A Y 1 +PFX A 0 ⲧⲉⲕ . + +PFX B Y 1 +PFX B 0 ⲙⲉⲧ/A . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic new file mode 100644 index 0000000000..bd0eb6df0d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.dic @@ -0,0 +1,2 @@ +1 +ⲟⲩⲣⲟ/B diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good new file mode 100644 index 0000000000..7eb9566199 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.good @@ -0,0 +1,3 @@ +ⲟⲩⲣⲟ +ⲙⲉⲧⲟⲩⲣⲟ +ⲧⲉⲕⲙⲉⲧⲟⲩⲣⲟ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong new file mode 100644 index 0000000000..d8021fc444 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/complexprefixesutf.wrong @@ -0,0 +1,2 @@ +ⲧⲉⲕⲟⲩⲣⲟ +ⲙⲉⲧⲧⲉⲕⲟⲩⲣⲟ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff new file mode 100644 index 0000000000..cae5669c21 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.aff @@ -0,0 +1,7 @@ +COMPOUNDFLAG X + +PFX P Y 1 +PFX P 0 pre . + +SFX S Y 1 +SFX S 0 suf . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic new file mode 100644 index 0000000000..eba6b83fb4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.dic @@ -0,0 +1,3 @@ +2 +foo/XPS +bar/XPS diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good new file mode 100644 index 0000000000..af1f0019ad --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.good @@ -0,0 +1,6 @@ +foo +foofoo +prefoo +foosuf +prefoosuf +prefoobarsuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong new file mode 100644 index 0000000000..b7e4067bca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix.wrong @@ -0,0 +1,3 @@ +foosufbar +fooprebarsuf +prefooprebarsuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff new file mode 100644 index 0000000000..1cac16e117 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.aff @@ -0,0 +1,8 @@ +COMPOUNDFLAG X +COMPOUNDPERMITFLAG Y + +PFX P Y 1 +PFX P 0 pre/Y . + +SFX S Y 1 +SFX S 0 suf/Y . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic new file mode 100644 index 0000000000..eba6b83fb4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.dic @@ -0,0 +1,3 @@ +2 +foo/XPS +bar/XPS diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good new file mode 100644 index 0000000000..9f3020da07 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.good @@ -0,0 +1,8 @@ +foo +prefoo +foosuf +prefoosuf +prefoobarsuf +foosufbar +fooprebarsuf +prefooprebarsuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff new file mode 100644 index 0000000000..98a12b56c7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.aff @@ -0,0 +1,8 @@ +COMPOUNDFLAG X +COMPOUNDFORBIDFLAG Z + +PFX P Y 1 +PFX P 0 pre/Z . + +SFX S Y 1 +SFX S 0 suf/Z . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic new file mode 100644 index 0000000000..eba6b83fb4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.dic @@ -0,0 +1,3 @@ +2 +foo/XPS +bar/XPS diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good new file mode 100644 index 0000000000..76cc08eae2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.good @@ -0,0 +1,5 @@ +foo +foofoo +prefoo +foosuf +prefoosuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong new file mode 100644 index 0000000000..d92b90b282 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundaffix3.wrong @@ -0,0 +1,6 @@ +prefoobarsuf +foosufbar +fooprebar +foosufprebar +fooprebarsuf +prefooprebarsuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff new file mode 100644 index 0000000000..bc8369ceba --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.aff @@ -0,0 +1,3 @@ +COMPOUNDMIN 3 +COMPOUNDFLAG A + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic new file mode 100644 index 0000000000..d1ea8e96e7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.dic @@ -0,0 +1,5 @@ +4 +foo/A +bar/A +xy/A +yz/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good new file mode 100644 index 0000000000..21cc29f2f0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.good @@ -0,0 +1,3 @@ +foobar +barfoo +foobarfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong new file mode 100644 index 0000000000..c185bf150e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundflag.wrong @@ -0,0 +1,4 @@ +xyyz +fooxy +xyfoo +fooxybar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff new file mode 100644 index 0000000000..09309e0aab --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.aff @@ -0,0 +1,3 @@ +COMPOUNDMIN 1 +COMPOUNDRULE 1 +COMPOUNDRULE ABC diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic new file mode 100644 index 0000000000..b11e8291e6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.dic @@ -0,0 +1,5 @@ +3 +a/A +b/B +c/BC + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good new file mode 100644 index 0000000000..c7a0763bb1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.good @@ -0,0 +1,2 @@ +abc +acc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong new file mode 100644 index 0000000000..bc151ea029 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule.wrong @@ -0,0 +1,39 @@ +ba +aaabaaa +bbaaa +aaaaba +bbbbbaa +aa +aaa +aaaa +ab +aab +aaab +aaaab +abb +aabb +aaabbb +bb +bbb +bbbb +aaab +abcc +abbc +abbcc +aabc +aabcc +aabbc +aabbcc +aaabbbccc +ac +aac +aacc +aaaccc +bc +bcc +bbc +bbcc +bbbccc +cc +ccc +cccccc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff new file mode 100644 index 0000000000..e4b86a53b4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.aff @@ -0,0 +1,3 @@ +COMPOUNDMIN 1 +COMPOUNDRULE 1 +COMPOUNDRULE A*B*C* diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic new file mode 100644 index 0000000000..7d07bbc89a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.dic @@ -0,0 +1,5 @@ +3 +a/A +b/B +c/C + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good new file mode 100644 index 0000000000..de743bb067 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.good @@ -0,0 +1,37 @@ +aa +aaa +aaaa +ab +aab +aaab +aaaab +abb +aabb +aaabbb +bb +bbb +bbbb +aaab +abc +abcc +abbc +abbcc +aabc +aabcc +aabbc +aabbcc +aaabbbccc +ac +acc +aac +aacc +aaaccc +bc +bcc +bbc +bbcc +bbbccc +cc +ccc +cccccc +abcc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong new file mode 100644 index 0000000000..9e5d38d350 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule2.wrong @@ -0,0 +1,8 @@ +ba +aaabaaa +bbaaa +aaaaba +bbbbbaa +cba +cab +acb diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff new file mode 100644 index 0000000000..005314586c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.aff @@ -0,0 +1,3 @@ +COMPOUNDMIN 1 +COMPOUNDRULE 1 +COMPOUNDRULE A?B?C? diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic new file mode 100644 index 0000000000..7d07bbc89a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.dic @@ -0,0 +1,5 @@ +3 +a/A +b/B +c/C + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good new file mode 100644 index 0000000000..7f518893e9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.good @@ -0,0 +1,7 @@ +a +b +c +ab +abc +ac +bc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong new file mode 100644 index 0000000000..6bd1d8004a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule3.wrong @@ -0,0 +1,41 @@ +aa +aaa +aaaa +aab +aaab +aaaab +abb +aabb +aaabbb +bb +bbb +bbbb +aaab +abcc +abbc +abbcc +aabc +aabcc +aabbc +aabbcc +aaabbbccc +acc +aac +aacc +aaaccc +bcc +bbc +bbcc +bbbccc +cc +ccc +cccccc +abcc +ba +aaabaaa +bbaaa +aaaaba +bbbbbaa +cba +cab +acb diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff new file mode 100644 index 0000000000..8a9996cb3e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.aff @@ -0,0 +1,7 @@ +# English ordinal numbers +WORDCHARS 0123456789 +COMPOUNDMIN 1 +ONLYINCOMPOUND c +COMPOUNDRULE 2 +COMPOUNDRULE n*1t +COMPOUNDRULE n*mp diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic new file mode 100644 index 0000000000..ced0735ec1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.dic @@ -0,0 +1,24 @@ +22 +0/nm +1/n1 +2/nm +3/nm +4/nm +5/nm +6/nm +7/nm +8/nm +9/nm +0th/pt +1st/p +1th/tc +2nd/p +2th/tc +3rd/p +3th/tc +4th/pt +5th/pt +6th/pt +7th/pt +8th/pt +9th/pt diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good new file mode 100644 index 0000000000..fafe64a5ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.good @@ -0,0 +1,29 @@ +1st +2nd +3rd +4th +5th +6th +7th +8th +9th +10th +11th +12th +13th +14th +15th +16th +17th +18th +19th +20th +21st +22nd +23rd +24th +25th +100th +1000th +10001st +10011th diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test new file mode 100644 index 0000000000..52e144cb80 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.test @@ -0,0 +1,6 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME + + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong new file mode 100644 index 0000000000..99f28e7cc3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule4.wrong @@ -0,0 +1,5 @@ +1th +2th +3th +10001th +10011st diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff new file mode 100644 index 0000000000..46502460bc --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.aff @@ -0,0 +1,7 @@ +# number + percent +SET UTF-8 +COMPOUNDMIN 1 +COMPOUNDRULE 2 +COMPOUNDRULE N*%? +COMPOUNDRULE NN*.NN*%? +WORDCHARS 0123456789‰. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic new file mode 100644 index 0000000000..eeeffdac50 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.dic @@ -0,0 +1,14 @@ +13 +0/N po:num +1/N po:num +2/N po:num +3/N po:num +4/N po:num +5/N po:num +6/N po:num +7/N po:num +8/N po:num +9/N po:num +./. po:sign_dot +%/% po:sign_percent +‰/% po:sign_per_mille diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good new file mode 100644 index 0000000000..691fca1fb9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.good @@ -0,0 +1,7 @@ +10% +0.2% +0.20% +123.4561‰ +10 +0000 +10.25 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph new file mode 100644 index 0000000000..107a80859f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.morph @@ -0,0 +1,21 @@ +> 10% +analyze(10%) = pa:1 st:1 po:num pa:0 st:0 po:num pa:% st:% po:sign_percent +stem(10%) = 10% +> 0.2% +analyze(0.2%) = pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:% st:% po:sign_percent +stem(0.2%) = 0.2% +> 0.20% +analyze(0.20%) = pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:0 st:0 po:num pa:% st:% po:sign_percent +stem(0.20%) = 0.20% +> 123.4561‰ +analyze(123.4561‰) = pa:1 st:1 po:num pa:2 st:2 po:num pa:3 st:3 po:num pa:. st:. po:sign_dot pa:4 st:4 po:num pa:5 st:5 po:num pa:6 st:6 po:num pa:1 st:1 po:num pa:‰ st:‰ po:sign_per_mille +stem(123.4561‰) = 123.4561‰ +> 10 +analyze(10) = pa:1 st:1 po:num pa:0 st:0 po:num +stem(10) = 10 +> 0000 +analyze(0000) = pa:0 st:0 po:num pa:0 st:0 po:num pa:0 st:0 po:num pa:0 st:0 po:num +stem(0000) = 0000 +> 10.25 +analyze(10.25) = pa:1 st:1 po:num pa:0 st:0 po:num pa:. st:. po:sign_dot pa:2 st:2 po:num pa:5 st:5 po:num +stem(10.25) = 10.25 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong new file mode 100644 index 0000000000..ba1fe3290f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule5.wrong @@ -0,0 +1 @@ +.25 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff new file mode 100644 index 0000000000..e8a088d5a7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.aff @@ -0,0 +1,4 @@ +COMPOUNDMIN 1 +COMPOUNDRULE 2 +COMPOUNDRULE A*A +COMPOUNDRULE A*AAB*BBBC*C diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic new file mode 100644 index 0000000000..7d07bbc89a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.dic @@ -0,0 +1,5 @@ +3 +a/A +b/B +c/C + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good new file mode 100644 index 0000000000..55a8f8bc5f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.good @@ -0,0 +1,4 @@ +aa +aaaaaa +aabbbc +aaaaabbbbbbcccccc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong new file mode 100644 index 0000000000..48b376dac5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule6.wrong @@ -0,0 +1,4 @@ +abc +abbbbbccccccc +aabbccccccc +aabbbbbbb diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff new file mode 100644 index 0000000000..3ae1fc7847 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.aff @@ -0,0 +1,8 @@ +# English ordinal numbers (parenthesized long flags) +FLAG long +WORDCHARS 0123456789 +COMPOUNDMIN 1 +ONLYINCOMPOUND cc +COMPOUNDRULE 2 +COMPOUNDRULE (nn)*(11)(tt) +COMPOUNDRULE (nn)*(mm)(pp) diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic new file mode 100644 index 0000000000..ad4bb4d284 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.dic @@ -0,0 +1,24 @@ +22 +0/nnmm +1/nn11 +2/nnmm +3/nnmm +4/nnmm +5/nnmm +6/nnmm +7/nnmm +8/nnmm +9/nnmm +0th/pptt +1st/pp +1th/ttcc +2nd/pp +2th/ttcc +3rd/pp +3th/ttcc +4th/pptt +5th/pptt +6th/pptt +7th/pptt +8th/pptt +9th/pptt diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good new file mode 100644 index 0000000000..fafe64a5ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.good @@ -0,0 +1,29 @@ +1st +2nd +3rd +4th +5th +6th +7th +8th +9th +10th +11th +12th +13th +14th +15th +16th +17th +18th +19th +20th +21st +22nd +23rd +24th +25th +100th +1000th +10001st +10011th diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test new file mode 100644 index 0000000000..52e144cb80 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.test @@ -0,0 +1,6 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME + + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong new file mode 100644 index 0000000000..99f28e7cc3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule7.wrong @@ -0,0 +1,5 @@ +1th +2th +3th +10001th +10011st diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff new file mode 100644 index 0000000000..03a423d486 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.aff @@ -0,0 +1,8 @@ +# English ordinal numbers (parenthesized numerical flags) +FLAG num +WORDCHARS 0123456789 +COMPOUNDMIN 1 +ONLYINCOMPOUND 1000 +COMPOUNDRULE 2 +COMPOUNDRULE (1001)*(1002)(2001) +COMPOUNDRULE (1001)*(2002)(2000) diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic new file mode 100644 index 0000000000..e156e95fe0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.dic @@ -0,0 +1,24 @@ +22 +0/1001,2002 +1/1001,1002 +2/1001,2002 +3/1001,2002 +4/1001,2002 +5/1001,2002 +6/1001,2002 +7/1001,2002 +8/1001,2002 +9/1001,2002 +0th/2000,2001 +1st/2000 +1th/2001,1000 +2nd/2000 +2th/2001,1000 +3rd/2000 +3th/2001,1000 +4th/2000,2001 +5th/2000,2001 +6th/2000,2001 +7th/2000,2001 +8th/2000,2001 +9th/2000,2001 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good new file mode 100644 index 0000000000..fafe64a5ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.good @@ -0,0 +1,29 @@ +1st +2nd +3rd +4th +5th +6th +7th +8th +9th +10th +11th +12th +13th +14th +15th +16th +17th +18th +19th +20th +21st +22nd +23rd +24th +25th +100th +1000th +10001st +10011th diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test new file mode 100644 index 0000000000..52e144cb80 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.test @@ -0,0 +1,6 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME + + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong new file mode 100644 index 0000000000..99f28e7cc3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/compoundrule8.wrong @@ -0,0 +1,5 @@ +1th +2th +3th +10001th +10011st diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff new file mode 100644 index 0000000000..62a1ce5e52 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.aff @@ -0,0 +1,42 @@ +SET UTF-8 +WORDCHARS 0123456789 + +SFX S N 18 +SFX S 0 suf1 . +SFX S 0 suf2 ó +SFX S 0 suf3 [áéóú] +SFX S 0 suf4 [^ó] +SFX S 0 suf5 [^áéóú] +SFX S 0 suf6 őó +SFX S 0 suf7 ő[áéóú] +SFX S 0 suf8 ő[^ó] +SFX S 0 suf9 ő[^áéóú] +SFX S 0 suf10 [áéóőú]ó +SFX S 0 suf11 [^ő]ó +SFX S 0 suf12 [^áéóőú]ó +SFX S 0 suf13 [áéőú][^ú] +SFX S 0 suf14 [^ú][áéóú] +SFX S 0 suf15 [áéóú][^áéőú] +SFX S 0 suf16 [^áéóú][^áéőú] +SFX S 0 suf17 [áéóú][bcdfgkmnóprstvz] +SFX S 0 suf18 [áéóú]ó + +PFX P N 18 +PFX P 0 pre1 . +PFX P 0 pre2 ó +PFX P 0 pre3 [áéóú] +PFX P 0 pre4 [^ó] +PFX P 0 pre5 [^áéóú] +PFX P 0 pre6 óő +PFX P 0 pre7 ó[áéőú] +PFX P 0 pre8 ó[^ő] +PFX P 0 pre9 ó[^áéóőú] +PFX P 0 pre10 [áéóőú]ő +PFX P 0 pre11 [^ó]ő +PFX P 0 pre12 [^áéóőú]ő +PFX P 0 pre13 [áéóú][áéőú] +PFX P 0 pre14 [áéóú][^áéóú] +PFX P 0 pre15 [áéóú][^áéőú] +PFX P 0 pre16 [^áéőú][^áéóú] +PFX P 0 pre17 [bcdfgkmnóprstvz][áéóú] +PFX P 0 pre18 ó[áéóú] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic new file mode 100644 index 0000000000..f03ce4ea2c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.dic @@ -0,0 +1,2 @@ +1 +óőó/SP diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good new file mode 100644 index 0000000000..6c6203737a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.good @@ -0,0 +1,19 @@ +óőó +óőósuf1 +pre1óőó +óőósuf2 +pre2óőó +óőósuf3 +pre3óőó +óőósuf6 +pre6óőó +óőósuf7 +pre7óőó +óőósuf10 +pre10óőó +óőósuf13 +pre13óőó +óőósuf14 +pre14óőó +óőósuf16 +pre16óőó diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong new file mode 100644 index 0000000000..f1022132ce --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition-utf.wrong @@ -0,0 +1,18 @@ +óőósuf4 +pre4óőó +óőósuf5 +pre5óőó +óőósuf8 +pre8óőó +óőósuf9 +pre9óőó +óőósuf11 +pre11óőó +óőósuf12 +pre12óőó +óőósuf15 +pre15óőó +óőósuf17 +óőósuf18 +pre17óőó +pre18óőó diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.aff b/extensions/spellcheck/hunspell/tests/unit/data/condition.aff new file mode 100644 index 0000000000..62157421ab --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.aff @@ -0,0 +1,62 @@ +SET ISO8859-2 +WORDCHARS 0123456789 + +SFX S N 18 +SFX S 0 suf1 . +SFX S 0 suf2 o +SFX S 0 suf3 [aeou] +SFX S 0 suf4 [^o] +SFX S 0 suf5 [^aeou] +SFX S 0 suf6 fo +SFX S 0 suf7 f[aeou] +SFX S 0 suf8 f[^o] +SFX S 0 suf9 f[^aeou] +SFX S 0 suf10 [aefu]o +SFX S 0 suf11 [^f]o +SFX S 0 suf12 [^aefu]o +SFX S 0 suf13 [aefu][^aefu] +SFX S 0 suf14 [^aeou][aeou] +SFX S 0 suf15 [aeou][^aefu] +SFX S 0 suf16 [^aeou][^aefu] +SFX S 0 suf17 [aeou][bcdfgkmnoprstvz] +SFX S 0 suf18 [aeou]o + +SFX Q N 2 +SFX Q 0 ning [^aeio][aeiou]n +SFX Q 0 ing [aeio][aeiou][bcdfgkmnprstvz] + +SFX T N 1 +SFX T y ies .[^aeiou]y + +PFX U N 1 +PFX U 0 un wr. + +SFX Z Y 3 +SFX Z 0 ch [].a +SFX Z 0 m [].a +SFX Z a 0 [].a + +PFX P N 18 +PFX P 0 pre1 . +PFX P 0 pre2 o +PFX P 0 pre3 [aeou] +PFX P 0 pre4 [^o] +PFX P 0 pre5 [^aeou] +PFX P 0 pre6 of +PFX P 0 pre7 o[aefou] +PFX P 0 pre8 o[^f] +PFX P 0 pre9 o[^aefu] +PFX P 0 pre10 [aefu]o +PFX P 0 pre11 [^f]o +PFX P 0 pre12 [^aefou]o +PFX P 0 pre13 [aeou][aefu] +PFX P 0 pre14 [aeou][^aeou] +PFX P 0 pre15 [aeou][^aefu] +PFX P 0 pre16 [^aefu][^aeou] +PFX P 0 pre17 [bcdfgkmnoprstvz][aeou] +PFX P 0 pre18 o[aeou] + + +PFX R N 2 +PFX R 0 gnin n[aeiou][^aeio] +PFX R 0 gni [bcdfgkmnprstvz][aeiou][aeio] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.dic b/extensions/spellcheck/hunspell/tests/unit/data/condition.dic new file mode 100644 index 0000000000..40ebd55880 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.dic @@ -0,0 +1,6 @@ +5 +ofo/SP +entertain/Q +nianretne/R +ra/Z +wry/TU diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.good b/extensions/spellcheck/hunspell/tests/unit/data/condition.good new file mode 100644 index 0000000000..8fef4a7477 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.good @@ -0,0 +1,26 @@ +ofo +ofosuf1 +pre1ofo +ofosuf2 +pre2ofo +ofosuf3 +pre3ofo +ofosuf6 +pre6ofo +ofosuf7 +pre7ofo +ofosuf10 +ofosuf13 +pre13ofo +ofosuf14 +pre14ofo +ofosuf16 +pre16ofo +entertain +entertaining +gninianretne +r +ram +rach +wries +unwry diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.test b/extensions/spellcheck/hunspell/tests/unit/data/condition.test new file mode 100644 index 0000000000..c95329532a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-2 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong b/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong new file mode 100644 index 0000000000..7b83d828d0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/condition.wrong @@ -0,0 +1,21 @@ +ofosuf4 +pre4ofo +ofosuf5 +pre5ofo +ofosuf8 +pre8ofo +ofosuf9 +pre9ofo +ofosuf11 +pre10ofo +pre11ofo +ofosuf12 +pre12ofo +ofosuf15 +pre15ofo +ofosuf17 +pre17ofo +ofosuf18 +pre18ofo +entertainning +gninnianretne diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff new file mode 100644 index 0000000000..e7a9bf749e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.aff @@ -0,0 +1,11 @@ +PFX P Y 1 +PFX P 0 un . ip:un + +SFX S Y 1 +SFX S 0 s . is:PL + +SFX Q Y 1 +SFX Q 0 s . is:3SGV + +SFX R Y 1 +SFX R 0 able/PS . ds:DER_V_ADJ_ABLE diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic new file mode 100644 index 0000000000..2f6d456152 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.dic @@ -0,0 +1,3 @@ +2 +drink/RQ po:verb +drink/S po:noun diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good new file mode 100644 index 0000000000..01438d0ebf --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.good @@ -0,0 +1,6 @@ +drink +drinks +drinkable +drinkables +undrinkable +undrinkables diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph new file mode 100644 index 0000000000..95d5443894 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.morph @@ -0,0 +1,20 @@ +> drink +analyze(drink) = st:drink po:verb +analyze(drink) = st:drink po:noun +stem(drink) = drink +> drinks +analyze(drinks) = st:drink po:verb is:3SGV +analyze(drinks) = st:drink po:noun is:PL +stem(drinks) = drink +> drinkable +analyze(drinkable) = st:drink po:verb ds:DER_V_ADJ_ABLE +stem(drinkable) = drinkable +> drinkables +analyze(drinkables) = st:drink po:verb ds:DER_V_ADJ_ABLE is:PL +stem(drinkables) = drinkable +> undrinkable +analyze(undrinkable) = ip:un st:drink po:verb ds:DER_V_ADJ_ABLE +stem(undrinkable) = drinkable +> undrinkables +analyze(undrinkables) = ip:un st:drink po:verb ds:DER_V_ADJ_ABLE is:PL +stem(undrinkables) = drinkable diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong new file mode 100644 index 0000000000..70262d9400 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/conditionalprefix.wrong @@ -0,0 +1,2 @@ +undrink +undrinks diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff new file mode 100644 index 0000000000..18a42f6fd9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.aff @@ -0,0 +1,9 @@ +# Digits in words, handled by COMPOUNDRULE. +# 1-jährig, 2-jährig, 100-jährig etc. +SET UTF-8 +COMPOUNDMIN 1 +# recognize ab, aab, aaab etc. compounds (a=digits, b=-jährig, see dic file) +COMPOUNDRULE 1 +COMPOUNDRULE a*b +ONLYINCOMPOUND c +WORDCHARS 0123456789- diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic new file mode 100644 index 0000000000..deeaece05d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.dic @@ -0,0 +1,12 @@ +11 +0/a +1/a +2/a +3/a +4/a +5/a +6/a +7/a +8/a +9/a +-jährig/bc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong new file mode 100644 index 0000000000..aeaf6ce344 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/digits-in-words.wrong @@ -0,0 +1 @@ +-jährig diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff b/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff new file mode 100644 index 0000000000..1f560d262e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.aff @@ -0,0 +1 @@ +SET ISO-8859-15 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic b/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic new file mode 100644 index 0000000000..414f9b8d3e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.dic @@ -0,0 +1,3 @@ +2 +cur +uvre diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.good b/extensions/spellcheck/hunspell/tests/unit/data/encoding.good new file mode 100644 index 0000000000..fc41c90aac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.good @@ -0,0 +1,4 @@ +cur +uvre +CUR +UVRE diff --git a/extensions/spellcheck/hunspell/tests/unit/data/encoding.test b/extensions/spellcheck/hunspell/tests/unit/data/encoding.test new file mode 100644 index 0000000000..09619572e9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/encoding.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-15 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.aff b/extensions/spellcheck/hunspell/tests/unit/data/flag.aff new file mode 100644 index 0000000000..ac105c11fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.aff @@ -0,0 +1,13 @@ +# base 1-character flags + +SFX A Y 1 +SFX A 0 s/123 . + +SFX 1 Y 1 +SFX 1 0 bar . + +SFX 2 Y 1 +SFX 2 0 baz . + +PFX 3 Y 1 +PFX 3 0 un . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.dic b/extensions/spellcheck/hunspell/tests/unit/data/flag.dic new file mode 100644 index 0000000000..b1b237106f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.dic @@ -0,0 +1,2 @@ +1 +foo/A3 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.good b/extensions/spellcheck/hunspell/tests/unit/data/flag.good new file mode 100644 index 0000000000..d5c27b1a67 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.good @@ -0,0 +1,8 @@ +foo +foos +foosbar +foosbaz +unfoo +unfoos +unfoosbar +unfoosbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flag.test b/extensions/spellcheck/hunspell/tests/unit/data/flag.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flag.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff new file mode 100644 index 0000000000..437f13b3ac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.aff @@ -0,0 +1,14 @@ +# 2-character flags +FLAG long + +SFX zx Y 1 +SFX zx 0 s/g?1G09 . + +SFX g? Y 1 +SFX g? 0 bar . + +SFX 1G Y 1 +SFX 1G 0 baz . + +PFX 09 Y 1 +PFX 09 0 un . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic new file mode 100644 index 0000000000..46c6012860 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.dic @@ -0,0 +1,2 @@ +1 +foo/zx09 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good new file mode 100644 index 0000000000..d5c27b1a67 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.good @@ -0,0 +1,8 @@ +foo +foos +foosbar +foosbaz +unfoo +unfoos +unfoosbar +unfoosbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flaglong.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff new file mode 100644 index 0000000000..823cee4cda --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.aff @@ -0,0 +1,14 @@ +# numerical flags +FLAG num + +SFX 999 Y 1 +SFX 999 0 s/214,216,54321 . + +SFX 214 Y 1 +SFX 214 0 bar . + +SFX 216 Y 1 +SFX 216 0 baz . + +PFX 54321 Y 1 +PFX 54321 0 un . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic new file mode 100644 index 0000000000..927c45f2fd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.dic @@ -0,0 +1,2 @@ +1 +foo/999,54321 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good new file mode 100644 index 0000000000..d5c27b1a67 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.good @@ -0,0 +1,8 @@ +foo +foos +foosbar +foosbaz +unfoo +unfoos +unfoosbar +unfoosbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagnum.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff new file mode 100644 index 0000000000..d0f75c1858 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.aff @@ -0,0 +1,15 @@ +# UTF-8 flags +FLAG UTF-8 + +SFX A Y 1 +SFX A 0 s/ÖüÜ . +#SFX A 0 s/ÖüÖÜ . + +SFX Ö Y 1 +SFX Ö 0 bar . + +SFX ü Y 1 +SFX ü 0 baz . + +PFX Ü Y 1 +PFX Ü 0 un . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic new file mode 100644 index 0000000000..2944490c90 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.dic @@ -0,0 +1,2 @@ +1 +foo/AÜ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good new file mode 100644 index 0000000000..d5c27b1a67 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.good @@ -0,0 +1,8 @@ +foo +foos +foosbar +foosbaz +unfoo +unfoos +unfoosbar +unfoosbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/flagutf8.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff new file mode 100644 index 0000000000..56cdabe5a3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.aff @@ -0,0 +1,12 @@ +# fogemorphemes: special morphemes in compounds +# +# Swedish example: +# gata + kontoret = gatukontoret + +COMPOUNDFLAG X +COMPOUNDBEGIN Y +ONLYINCOMPOUND Z +COMPOUNDPERMITFLAG P + +SFX A Y 1 +SFX A a u/YPZ . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic new file mode 100644 index 0000000000..1b76380d1b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.dic @@ -0,0 +1,3 @@ +2 +gata/A +kontoret/X diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good new file mode 100644 index 0000000000..01e77d561d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.good @@ -0,0 +1,3 @@ +gata +kontoret +gatukontoret diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong new file mode 100644 index 0000000000..f920745c79 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fogemorpheme.wrong @@ -0,0 +1,3 @@ +gatu +gatakontoret +kontoretgatu diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff new file mode 100644 index 0000000000..de7f8ad9a4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.aff @@ -0,0 +1,11 @@ +# FORBIDDENWORD flag +# The signed word, and its suffixed forms are all forbidden, +# excepts with root homonyms. +# Useful for forbidding bad suffixed forms or compounds. + + +FORBIDDENWORD X +COMPOUNDFLAG Y + +SFX A Y 1 +SFX A 0 s . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic new file mode 100644 index 0000000000..78f2ee3d15 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.dic @@ -0,0 +1,8 @@ +5 +foo/S [1] +foo/YX [2] +foo/Y [3] +foo/S [4] +bar/YS [5] +bars/X +foos/X diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good new file mode 100644 index 0000000000..7bd112e9ea --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.good @@ -0,0 +1,3 @@ +foo +bar + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong new file mode 100644 index 0000000000..5752c1e446 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forbiddenword.wrong @@ -0,0 +1,4 @@ +bars +foos +foobar +barfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff new file mode 100644 index 0000000000..5eebcbdab7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.aff @@ -0,0 +1,4 @@ +# force capitalized compound +TRY F +FORCEUCASE A +COMPOUNDFLAG C diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic new file mode 100644 index 0000000000..82fd93b309 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.dic @@ -0,0 +1,4 @@ +3 +foo/C +bar/C +baz/CA diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good new file mode 100644 index 0000000000..37ecf49573 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.good @@ -0,0 +1,7 @@ +foo +bar +baz +foobar +Foobaz +foobazbar +Foobarbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug new file mode 100644 index 0000000000..6a77cbd06d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.sug @@ -0,0 +1,2 @@ +Foobaz +Foobarbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong new file mode 100644 index 0000000000..1503e42ddc --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/forceucase.wrong @@ -0,0 +1,2 @@ +foobaz +foobarbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff new file mode 100644 index 0000000000..d60cb74d7c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.aff @@ -0,0 +1,15 @@ +# FULLSTRIP option: Hunspell can strip full words by affix rules +# see OpenOffice.org Issue #80145 +# test data from Davide Prina + +FULLSTRIP + +SET ISO8859-15 +TRY aioertnsclmdpgubzfvhàq'ACMSkBGPLxEyRTVòIODNwFéùèìjUZKHWJYQX + +SFX A Y 3 # verbo andare (verb to go) +SFX A andare vado andare # io vado (I go) +SFX A andare va andare # tu vai (you go) +SFX A are iamo andare # noi andiamo (we go) + + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic new file mode 100644 index 0000000000..553113d442 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.dic @@ -0,0 +1,4 @@ +2 +andare/A +riandare/A + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good new file mode 100644 index 0000000000..1240e71f53 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.good @@ -0,0 +1,9 @@ +andare +vado +va +andiamo +riandare +rivado +riva +riandiamo + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/fullstrip.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff new file mode 100644 index 0000000000..5ff25872ce --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.aff @@ -0,0 +1,91 @@ +# German compounding + +# handle special casing of German sharp s + +CHECKSHARPS + +# compound flags + +COMPOUNDBEGIN U +COMPOUNDMIDDLE V +COMPOUNDEND W + +# Prefixes are allowed at the beginning of compounds, +# suffixes are allowed at the end of compounds by default: +# (prefix)?(root)+(affix)? +# Affixes with COMPOUNDPERMITFLAG may be inside of compounds. +COMPOUNDPERMITFLAG P + +# for German fogemorphemes (Fuge-element) +# Hint: ONLYINCOMPOUND is not required everywhere, but the +# checking will be a little faster with it. + +ONLYINCOMPOUND X + +# forbid uppercase characters at compound word bounds +CHECKCOMPOUNDCASE + +# for handling Fuge-elements with dashes (Arbeits-) +# dash will be a special word + +COMPOUNDMIN 1 +WORDCHARS - + +# compound settings and fogemorpheme for `Arbeit' + +SFX A Y 3 +SFX A 0 s/UPX . +SFX A 0 s/VPDX . +SFX A 0 0/WXD . + +SFX B Y 2 +SFX B 0 0/UPX . +SFX B 0 0/VWXDP . + +# a suffix for `Computer' + +SFX C Y 1 +SFX C 0 n/WD . + +# for forbid exceptions (*Arbeitsnehmer) + +FORBIDDENWORD Z + +# dash prefix for compounds with dash (Arbeits-Computer) + +PFX - Y 1 +PFX - 0 -/P . + +# decapitalizing prefix +# circumfix for positioning in compounds + +PFX D Y 29 +PFX D A a/PX A +PFX D /PX +PFX D B b/PX B +PFX D C c/PX C +PFX D D d/PX D +PFX D E e/PX E +PFX D F f/PX F +PFX D G g/PX G +PFX D H h/PX H +PFX D I i/PX I +PFX D J j/PX J +PFX D K k/PX K +PFX D L l/PX L +PFX D M m/PX M +PFX D N n/PX N +PFX D O o/PX O +PFX D /PX +PFX D P p/PX P +PFX D Q q/PX Q +PFX D R r/PX R +PFX D S s/PX S +PFX D T t/PX T +PFX D U u/PX U +PFX D /PX +PFX D V v/PX V +PFX D W w/PX W +PFX D X x/PX X +PFX D Y y/PX Y +PFX D Z z/PX Z diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic new file mode 100644 index 0000000000..5db6783a4d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.dic @@ -0,0 +1,5 @@ +4 +Arbeit/A- +Computer/BC- +-/W +Arbeitsnehmer/Z diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good new file mode 100644 index 0000000000..e4945553c5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.good @@ -0,0 +1,20 @@ +Computer +Computern +Arbeit +Arbeits- +Computerarbeit +Computerarbeits- +Arbeitscomputer +Computercomputer +Computercomputern +Arbeitscomputern +Computerarbeitscomputer +Computerarbeitscomputern +Arbeitscomputercomputer +Computercomputerarbeit +Arbeitscomputerarbeit +Arbeitsarbeitsarbeit +Computerarbeitsarbeit +Computerarbeits-Computer +Computerarbeits-Computern +Computer-Arbeit diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong new file mode 100644 index 0000000000..c5f2ba1151 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompounding.wrong @@ -0,0 +1,50 @@ +computer +computern +arbeit +Arbeits +arbeits +ComputerArbeit +ComputernArbeit +Computernarbeit +ComputerArbeits +Arbeitcomputer +Arbeitcomputern +ArbeitsComputer +ArbeitsComputern +Computerarbeitcomputer +ComputerArbeitcomputer +ComputerArbeitscomputer +Computerarbeitcomputern +ComputerArbeitcomputern +ComputerArbeitscomputern +Arbeitscomputerarbeits +Arbeitscomputernarbeits +Computerarbeits-computer +Arbeitsnehmer +computers +computern +computernarbeit +computernArbeit +computerArbeit +computerArbeits +arbeitcomputer +arbeitsComputer +computerarbeitcomputer +computerArbeitcomputer +computerArbeitscomputer +arbeitscomputerarbeits +computerarbeits-computer +arbeitsnehmer +computernarbeit +computernArbeit +arbeits- +computerarbeit +computerarbeits- +arbeitscomputer +arbeitscomputern +computerarbeitscomputer +computerarbeitscomputern +computerarbeitscomputers +arbeitscomputerarbeit +computerarbeits-Computer +computerarbeits-Computern diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff new file mode 100644 index 0000000000..3e06f0647e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.aff @@ -0,0 +1,96 @@ +# German compounding + +# handle special casing of German sharp s + +CHECKSHARPS + +# compound flags + +COMPOUNDBEGIN U +COMPOUNDMIDDLE V +COMPOUNDEND W + +# Prefixes are allowed at the beginning of compounds, +# suffixes are allowed at the end of compounds by default: +# (prefix)?(root)+(affix)? +# Affixes with COMPOUNDPERMITFLAG may be inside of compounds. +COMPOUNDPERMITFLAG P + +# for German fogemorphemes (Fuge-element) +# Hint: ONLYINCOMPOUND is not required everywhere, but the +# checking will be a little faster with it. + +ONLYINCOMPOUND X + +# for decapitalizing nouns with fogemorphemes + +CIRCUMFIX Y + +# for handling Fuge-elements with dashes (Arbeits-) +# dash will be a special word + +COMPOUNDMIN 1 +WORDCHARS - + +# compound settings and fogemorpheme for `Arbeit' + +SFX A Y 3 +SFX A 0 s/UPX . +SFX A 0 s/VPXDY . +SFX A 0 0/WXDY . + +# compound settings for `Computer' + +SFX B Y 2 +SFX B 0 0/UPX . +SFX B 0 0/VWPXDY . + +# a suffix for `Computer' + +SFX C Y 2 +SFX C 0 n . +SFX C 0 n/WXDY . + +# for forbid exceptions (*Arbeitsnehmer) + +FORBIDDENWORD Z + +# dash prefix for compounds with dash (Arbeits-Computer) + +PFX - Y 2 +PFX - 0 -/PUVW . +PFX - 0 -/PY . + +# decapitalizing prefix +# circumfix for positioning in compounds + +PFX D Y 29 +PFX D A a/PXY A +PFX D /PXY +PFX D B b/PXY B +PFX D C c/PXY C +PFX D D d/PXY D +PFX D E e/PXY E +PFX D F f/PXY F +PFX D G g/PXY G +PFX D H h/PXY H +PFX D I i/PXY I +PFX D J j/PXY J +PFX D K k/PXY K +PFX D L l/PXY L +PFX D M m/PXY M +PFX D N n/PXY N +PFX D O o/PXY O +PFX D /PXY +PFX D P p/PXY P +PFX D Q q/PXY Q +PFX D R r/PXY R +PFX D S s/PXY S +PFX D T t/PXY T +PFX D U u/PXY U +PFX D /PXY +PFX D V v/PXY V +PFX D W w/PXY W +PFX D X x/PXY X +PFX D Y y/PXY Y +PFX D Z z/PXY Z diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic new file mode 100644 index 0000000000..5db6783a4d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.dic @@ -0,0 +1,5 @@ +4 +Arbeit/A- +Computer/BC- +-/W +Arbeitsnehmer/Z diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good new file mode 100644 index 0000000000..5357bff165 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.good @@ -0,0 +1,14 @@ +Computer +Computern +Arbeit +Arbeits- +Computerarbeit +Computerarbeits- +Arbeitscomputer +Arbeitscomputern +Computerarbeitscomputer +Computerarbeitscomputern +Arbeitscomputerarbeit +Computerarbeits-Computer +Computerarbeits-Computern +Computer-Arbeit diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong new file mode 100644 index 0000000000..c5f2ba1151 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/germancompoundingold.wrong @@ -0,0 +1,50 @@ +computer +computern +arbeit +Arbeits +arbeits +ComputerArbeit +ComputernArbeit +Computernarbeit +ComputerArbeits +Arbeitcomputer +Arbeitcomputern +ArbeitsComputer +ArbeitsComputern +Computerarbeitcomputer +ComputerArbeitcomputer +ComputerArbeitscomputer +Computerarbeitcomputern +ComputerArbeitcomputern +ComputerArbeitscomputern +Arbeitscomputerarbeits +Arbeitscomputernarbeits +Computerarbeits-computer +Arbeitsnehmer +computers +computern +computernarbeit +computernArbeit +computerArbeit +computerArbeits +arbeitcomputer +arbeitsComputer +computerarbeitcomputer +computerArbeitcomputer +computerArbeitscomputer +arbeitscomputerarbeits +computerarbeits-computer +arbeitsnehmer +computernarbeit +computernArbeit +arbeits- +computerarbeit +computerarbeits- +arbeitscomputer +arbeitscomputern +computerarbeitscomputer +computerarbeitscomputern +computerarbeitscomputers +arbeitscomputerarbeit +computerarbeits-Computer +computerarbeits-Computern diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff b/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff new file mode 100644 index 0000000000..96755c7ecd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.aff @@ -0,0 +1,203 @@ +# Ngram suggestions +# - fix case problem +# - detect character swapping (keep only these suggestions) +# - lesser suggestions +# - weight with common subsequence algorithm +# - suggest uppercased words + +# 2007-02-05: +# now not neighbour character replacements and character movings are +# detected by not ngram suggestions, too. + +# OpenOffice.org's en_US.aff file + +SET ISO8859-1 +TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ' + +WORDCHARS ' + +PFX A Y 1 +PFX A 0 re . + +PFX I Y 1 +PFX I 0 in . + +PFX U Y 1 +PFX U 0 un . + +PFX C Y 1 +PFX C 0 de . + +PFX E Y 1 +PFX E 0 dis . + +PFX F Y 1 +PFX F 0 con . + +PFX K Y 1 +PFX K 0 pro . + +SFX V N 2 +SFX V e ive e +SFX V 0 ive [^e] + +SFX N Y 3 +SFX N e ion e +SFX N y ication y +SFX N 0 en [^ey] + +SFX X Y 3 +SFX X e ions e +SFX X y ications y +SFX X 0 ens [^ey] + +SFX H N 2 +SFX H y ieth y +SFX H 0 th [^y] + +SFX Y Y 1 +SFX Y 0 ly . + +SFX G Y 2 +SFX G e ing e +SFX G 0 ing [^e] + +SFX J Y 2 +SFX J e ings e +SFX J 0 ings [^e] + +SFX D Y 4 +SFX D 0 d e +SFX D y ied [^aeiou]y +SFX D 0 ed [^ey] +SFX D 0 ed [aeiou]y + +SFX T N 4 +SFX T 0 st e +SFX T y iest [^aeiou]y +SFX T 0 est [aeiou]y +SFX T 0 est [^ey] + +SFX R Y 4 +SFX R 0 r e +SFX R y ier [^aeiou]y +SFX R 0 er [aeiou]y +SFX R 0 er [^ey] + +SFX Z Y 4 +SFX Z 0 rs e +SFX Z y iers [^aeiou]y +SFX Z 0 ers [aeiou]y +SFX Z 0 ers [^ey] + +SFX S Y 4 +SFX S y ies [^aeiou]y +SFX S 0 s [aeiou]y +SFX S 0 es [sxzh] +SFX S 0 s [^sxzhy] + +SFX P Y 3 +SFX P y iness [^aeiou]y +SFX P 0 ness [aeiou]y +SFX P 0 ness [^y] + +SFX M Y 1 +SFX M 0 's . + +SFX B Y 3 +SFX B 0 able [^aeiou] +SFX B 0 able ee +SFX B e able [^aeiou]e + +SFX L Y 1 +SFX L 0 ment . + +REP 88 +REP a ei +REP ei a +REP a ey +REP ey a +REP ai ie +REP ie ai +REP are air +REP are ear +REP are eir +REP air are +REP air ere +REP ere air +REP ere ear +REP ere eir +REP ear are +REP ear air +REP ear ere +REP eir are +REP eir ere +REP ch te +REP te ch +REP ch ti +REP ti ch +REP ch tu +REP tu ch +REP ch s +REP s ch +REP ch k +REP k ch +REP f ph +REP ph f +REP gh f +REP f gh +REP i igh +REP igh i +REP i uy +REP uy i +REP i ee +REP ee i +REP j di +REP di j +REP j gg +REP gg j +REP j ge +REP ge j +REP s ti +REP ti s +REP s ci +REP ci s +REP k cc +REP cc k +REP k qu +REP qu k +REP kw qu +REP o eau +REP eau o +REP o ew +REP ew o +REP oo ew +REP ew oo +REP ew ui +REP ui ew +REP oo ui +REP ui oo +REP ew u +REP u ew +REP oo u +REP u oo +REP u oe +REP oe u +REP u ieu +REP ieu u +REP ue ew +REP ew ue +REP uff ough +REP oo ieu +REP ieu oo +REP ier ear +REP ear ier +REP ear air +REP air ear +REP w qu +REP qu w +REP z ss +REP ss z +REP shun tion +REP shun sion +REP shun cion diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic b/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic new file mode 100644 index 0000000000..0c61f0031e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.dic @@ -0,0 +1,15 @@ +15 +endangerment/SM +ferment/FSCM +preferment/SM +impermanent/Y +permanent/YSP +semipermanent/Y +empowerment/MS +supermen +tournament/MS +ornamental/SY +ornament/GSDM +supernatant +pimpernel +UNESCO/M diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.good b/extensions/spellcheck/hunspell/tests/unit/data/i35725.good new file mode 100644 index 0000000000..052ba8418a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.good @@ -0,0 +1 @@ +permanent diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug b/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug new file mode 100644 index 0000000000..a8bf1d9808 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.sug @@ -0,0 +1,10 @@ +permanent, preferment +permanent, ornament +permanent +Permanent, Preferment +Permanent, Ornament +Permanent +UNESCO +UNESCO +UNESCO's +UNESCO's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.test b/extensions/spellcheck/hunspell/tests/unit/data/i35725.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong new file mode 100644 index 0000000000..573e195d8f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i35725.wrong @@ -0,0 +1,10 @@ +permenant +pernament +pernemant +Permenant +Pernament +Pernemant +unesco +Unesco +unesco's +Unesco's diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff b/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff new file mode 100644 index 0000000000..9fac6d84ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.aff @@ -0,0 +1,2 @@ +# check numbers with separators +WORDCHARS 0123456789.-, diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic b/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic new file mode 100644 index 0000000000..aec5d506bd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.dic @@ -0,0 +1,2 @@ +1 +foo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.good b/extensions/spellcheck/hunspell/tests/unit/data/i53643.good new file mode 100644 index 0000000000..116333452f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.good @@ -0,0 +1,19 @@ +1 +12 +123 +1234 +12345 +123456 +1234567 +1.1 +1.12 +1.123 +1.1234 +1.12345 +1.123456 +12.1 +123.12 +1234.123 +12345.1234 +123456.12345 +1234567.123456 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.test b/extensions/spellcheck/hunspell/tests/unit/data/i53643.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong new file mode 100644 index 0000000000..45c61d2985 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i53643.wrong @@ -0,0 +1,4 @@ +1..2 +1,,2 +1.,2 +1,.2 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff b/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff new file mode 100644 index 0000000000..46281e1c5b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.aff @@ -0,0 +1,2 @@ +# Missing capitalized suggestion for capitalized bad words +SET ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic b/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic new file mode 100644 index 0000000000..e26d6f9c89 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.dic @@ -0,0 +1,2 @@ +1 +diter diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.good b/extensions/spellcheck/hunspell/tests/unit/data/i54633.good new file mode 100644 index 0000000000..a115f67ed4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.good @@ -0,0 +1,2 @@ +diter +diter diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug b/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug new file mode 100644 index 0000000000..a115f67ed4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.sug @@ -0,0 +1,2 @@ +diter +diter diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.test b/extensions/spellcheck/hunspell/tests/unit/data/i54633.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong new file mode 100644 index 0000000000..579a45dab1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54633.wrong @@ -0,0 +1,2 @@ +editer +Editer diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff b/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff new file mode 100644 index 0000000000..37cc5c53d9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.aff @@ -0,0 +1,2 @@ +# ISO-8859-15 (extended latin-1) support for French, Finnish and EURO symbol +SET ISO8859-15 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic b/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic new file mode 100644 index 0000000000..414f9b8d3e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.dic @@ -0,0 +1,3 @@ +2 +cur +uvre diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.good b/extensions/spellcheck/hunspell/tests/unit/data/i54980.good new file mode 100644 index 0000000000..fc41c90aac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.good @@ -0,0 +1,4 @@ +cur +uvre +CUR +UVRE diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i54980.test b/extensions/spellcheck/hunspell/tests/unit/data/i54980.test new file mode 100644 index 0000000000..09619572e9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i54980.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-15 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff b/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff new file mode 100644 index 0000000000..11249d4f28 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.aff @@ -0,0 +1,4 @@ +# case suggestions +MAXNGRAMSUGS 0 +# capitalise baz->Baz +TRY B diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic b/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic new file mode 100644 index 0000000000..19e1980ba2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.dic @@ -0,0 +1,5 @@ +4 +foo +bar +Baz +Boo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.good b/extensions/spellcheck/hunspell/tests/unit/data/i58202.good new file mode 100644 index 0000000000..88a079a55d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.good @@ -0,0 +1,10 @@ +foo +bar +Foo +Bar +Baz +Boo +FOO +BAR +BAZ +BOO diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug b/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug new file mode 100644 index 0000000000..bc784acef9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.sug @@ -0,0 +1,13 @@ +foo, Boo +Bar +Baz +Boo +foo bar +foo Bar +Foo bar +Foo Bar +foo Baz +Foo Baz +Baz foo +Baz Foo +Baz Boo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.test b/extensions/spellcheck/hunspell/tests/unit/data/i58202.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong new file mode 100644 index 0000000000..886584d809 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i58202.wrong @@ -0,0 +1,13 @@ +fOO +BAr +baz +BOo +foobar +fooBar +Foobar +FooBar +fooBaz +FooBaz +Bazfoo +BazFoo +BazBoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff b/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff new file mode 100644 index 0000000000..f0c639e8dd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.aff @@ -0,0 +1,7 @@ +# Sant'Elia -> SANT'ELIA (Italian) +# OpenOffice.org Issue 68658 + +PFX a Y 1 +PFX a 0 Sant' E + +WORDCHARS ' diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic b/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic new file mode 100644 index 0000000000..966010835b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.dic @@ -0,0 +1,2 @@ +1 +Elia/a diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.test b/extensions/spellcheck/hunspell/tests/unit/data/i68568.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong new file mode 100644 index 0000000000..998e9f4e46 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568.wrong @@ -0,0 +1,5 @@ +sant'elia +sant'Elia +Sant'elia +Sant' +SANT' diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff new file mode 100644 index 0000000000..7076ee938a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.aff @@ -0,0 +1,8 @@ +# Sant'Elia -> SANT'ELIA (Italian) +# OpenOffice.org Issue 68658 +SET UTF-8 + +PFX a Y 1 +PFX a 0 Foó' B + +WORDCHARS ' diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic new file mode 100644 index 0000000000..bc38229faa --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.dic @@ -0,0 +1,2 @@ +1 +Bár/a diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong new file mode 100644 index 0000000000..0713c13690 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/i68568utf.wrong @@ -0,0 +1,5 @@ +foó'bár +foó'Bár +Foó'bár +foó' +FOÓ' diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff b/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff new file mode 100644 index 0000000000..36cf7a2234 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.aff @@ -0,0 +1,10 @@ +# input conversion (accept comma acuted letters also with cedilla, +# as de facto replacement of the Romanian standard) +SET UTF-8 + +ICONV 4 +ICONV ş ș +ICONV ţ ț +ICONV Ş Ș +ICONV Ţ Ț + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic b/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic new file mode 100644 index 0000000000..8326eee2d6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.dic @@ -0,0 +1,5 @@ +4 +Chișinău +Țepes +ț +Ș diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.good b/extensions/spellcheck/hunspell/tests/unit/data/iconv.good new file mode 100644 index 0000000000..746cf1e539 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.good @@ -0,0 +1,6 @@ +Chișinău +Chişinău +Țepes +Ţepes +Ş +ţ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/iconv.test b/extensions/spellcheck/hunspell/tests/unit/data/iconv.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/iconv.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff b/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff new file mode 100644 index 0000000000..238dc15e55 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.aff @@ -0,0 +1,5 @@ +# ignore characters in words (for Arabic Harakat or Hebrew niqqud) +IGNORE aeiou + +PFX A Y 1 +PFX A 0 re . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic b/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic new file mode 100644 index 0000000000..846983b7d8 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.dic @@ -0,0 +1,3 @@ +2 +xmpl +expression/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.good b/extensions/spellcheck/hunspell/tests/unit/data/ignore.good new file mode 100644 index 0000000000..d7dd645c29 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.good @@ -0,0 +1,6 @@ +example +expression +xmpl +xprssn +reexpression +rxprssn diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignore.test b/extensions/spellcheck/hunspell/tests/unit/data/ignore.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignore.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff new file mode 100644 index 0000000000..8646676d09 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.aff @@ -0,0 +1,6 @@ +# Arabic test for feature ignoring diacritics +SET UTF-8 +# Arabic diacritics (harakat): +# sukun, shadda, kasra, damma, fatha, kasratan, dammantan, fathatan (left to right) +IGNORE ًٌٍَُِّْ +WORDCHARS ًٌٍَُِّْ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic new file mode 100644 index 0000000000..d4a2a81e7d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.dic @@ -0,0 +1,10 @@ +9 +طِير +فَتحة +ضُمة +كِسرة +فتحًتان +ضمتانٌ +كسرتاٍن +شدّة +سكوْن diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good new file mode 100644 index 0000000000..d463cd59fc --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.good @@ -0,0 +1,9 @@ +طير +فتحة +ضمة +كسرة +فتحتان +ضمتان +كسرتان +شدة +سكون diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ignoreutf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff new file mode 100644 index 0000000000..b08006bf78 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.aff @@ -0,0 +1,3 @@ +# keep case in signed words +KEEPCASE A +WORDCHARS . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic new file mode 100644 index 0000000000..bf9992acf2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.dic @@ -0,0 +1,5 @@ +4 +foo/A +Bar/A +baz./A +Quux./A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good new file mode 100644 index 0000000000..e6ff1817d3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.good @@ -0,0 +1,4 @@ +foo +Bar +baz. +Quux. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug new file mode 100644 index 0000000000..551dd8bb36 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.sug @@ -0,0 +1,8 @@ +foo +foo +Bar +Bar +baz. +baz. +Quux. +Quux. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong new file mode 100644 index 0000000000..3b79142915 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/keepcase.wrong @@ -0,0 +1,8 @@ +Foo +FOO +BAR +bar +Baz. +BAZ. +quux. +QUUX. diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.aff b/extensions/spellcheck/hunspell/tests/unit/data/korean.aff new file mode 100644 index 0000000000..979e3c2284 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.aff @@ -0,0 +1 @@ +SET UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.dic b/extensions/spellcheck/hunspell/tests/unit/data/korean.dic new file mode 100644 index 0000000000..95cb4508e3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.dic @@ -0,0 +1,3 @@ +2 +들어오세요 +안녕하세요 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.good b/extensions/spellcheck/hunspell/tests/unit/data/korean.good new file mode 100644 index 0000000000..660d506bb6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.good @@ -0,0 +1,2 @@ +들어오세요 +안녕하세요 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.test b/extensions/spellcheck/hunspell/tests/unit/data/korean.test new file mode 100644 index 0000000000..4d59c42126 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong b/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong new file mode 100644 index 0000000000..5ea85cead3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/korean.wrong @@ -0,0 +1 @@ +들어오세 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.aff b/extensions/spellcheck/hunspell/tests/unit/data/map.aff new file mode 100644 index 0000000000..3e78baba6b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/map.aff @@ -0,0 +1,9 @@ +# With MAP suggestion, Hunspell can add missing accents to a word. + +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 + +MAP 3 +MAP u +MAP o +MAP (ss) diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.dic b/extensions/spellcheck/hunspell/tests/unit/data/map.dic new file mode 100644 index 0000000000..744394f0c8 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/map.dic @@ -0,0 +1,4 @@ +3 +Frhstck +tkrfr +gro diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.sug b/extensions/spellcheck/hunspell/tests/unit/data/map.sug new file mode 100644 index 0000000000..cadb754228 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/map.sug @@ -0,0 +1,3 @@ +Frhstck +tkrfr +gro diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.test b/extensions/spellcheck/hunspell/tests/unit/data/map.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/map.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/map.wrong b/extensions/spellcheck/hunspell/tests/unit/data/map.wrong new file mode 100644 index 0000000000..251c8a1e94 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/map.wrong @@ -0,0 +1,3 @@ +Fruhstuck +tukorfuro +gross diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff b/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff new file mode 100644 index 0000000000..30edb2a785 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.aff @@ -0,0 +1,11 @@ +# With MAP suggestion, Hunspell can add missing accents to a word. + +SET UTF-8 + +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 + +MAP 3 +MAP uúü +MAP öóo +MAP ß(ss) diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic b/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic new file mode 100644 index 0000000000..1c6fa8d058 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.dic @@ -0,0 +1,4 @@ +3 +Frühstück +tükörfúró +groß diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug b/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug new file mode 100644 index 0000000000..81d09e0214 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.sug @@ -0,0 +1,3 @@ +Frühstück +tükörfúró +groß diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.test b/extensions/spellcheck/hunspell/tests/unit/data/maputf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong new file mode 100644 index 0000000000..251c8a1e94 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/maputf.wrong @@ -0,0 +1,3 @@ +Fruhstuck +tukorfuro +gross diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.aff b/extensions/spellcheck/hunspell/tests/unit/data/morph.aff new file mode 100644 index 0000000000..6080858608 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.aff @@ -0,0 +1,12 @@ +# example for morphological analysis, stemming and generation +PFX P Y 1 +PFX P 0 un . dp:pfx_un sp:un + +SFX S Y 1 +SFX S 0 s . is:plur + +SFX Q Y 1 +SFX Q 0 s . is:sg_3 + +SFX R Y 1 +SFX R 0 able/PS . ds:der_able diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.dic b/extensions/spellcheck/hunspell/tests/unit/data/morph.dic new file mode 100644 index 0000000000..f8d58a6d4d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.dic @@ -0,0 +1,10 @@ +9 +drink/S po:noun +drink/RQ po:verb al:drank al:drunk ts:present +drank po:verb st:drink is:past_1 +drunk po:verb st:drink is:past_2 +eat/RQ po:verb al:ate al:eaten ts:present +ate po:verb st:eat is:past_1 +eaten po:verb st:eat is:past_2 +phenomenon po:noun al:phenomena +phenomena po:noun st:phenomenon is:plur diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.good b/extensions/spellcheck/hunspell/tests/unit/data/morph.good new file mode 100644 index 0000000000..9f0d24768f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.good @@ -0,0 +1,26 @@ +drink +drinks +drinkable +drinkables +undrinkable +undrinkables +drank +drunk +phenomenon +phenomena +drink eat +drink eats +drink ate +drink eaten +drink eatable +drink eatables +drink phenomena +drinks eat +drinks eats +drinks ate +drinks eaten +drinks eatable +drinks eatables +drinks phenomena +undrinkable phenomena +phenomenon drinks diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.morph b/extensions/spellcheck/hunspell/tests/unit/data/morph.morph new file mode 100644 index 0000000000..9965d7ea83 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.morph @@ -0,0 +1,48 @@ +> drink +analyze(drink) = st:drink po:noun +analyze(drink) = st:drink po:verb al:drank al:drunk ts:present +stem(drink) = drink +> drinks +analyze(drinks) = st:drink po:verb al:drank al:drunk ts:present is:sg_3 +analyze(drinks) = st:drink po:noun is:plur +stem(drinks) = drink +> drinkable +analyze(drinkable) = st:drink po:verb al:drank al:drunk ts:present ds:der_able +stem(drinkable) = drinkable +> drinkables +analyze(drinkables) = st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur +stem(drinkables) = drinkable +> undrinkable +analyze(undrinkable) = dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able +stem(undrinkable) = undrinkable +> undrinkables +analyze(undrinkables) = dp:pfx_un sp:un st:drink po:verb al:drank al:drunk ts:present ds:der_able is:plur +stem(undrinkables) = undrinkable +> drank +analyze(drank) = po:verb st:drink is:past_1 +stem(drank) = drink +> drunk +analyze(drunk) = po:verb st:drink is:past_2 +stem(drunk) = drink +> phenomenon +analyze(phenomenon) = st:phenomenon po:noun al:phenomena +stem(phenomenon) = phenomenon +> phenomena +analyze(phenomena) = po:noun st:phenomenon is:plur +stem(phenomena) = phenomenon +generate(drink, eat) = drink +generate(drink, eats) = drinks +generate(drink, ate) = drank +generate(drink, eaten) = drunk +generate(drink, eatable) = drinkable +generate(drink, eatables) = drinkables +generate(drink, phenomena) = drinks +generate(drinks, eat) = drink +generate(drinks, eats) = drinks +generate(drinks, ate) = drank +generate(drinks, eaten) = drunk +generate(drinks, eatable) = drinkable +generate(drinks, eatables) = drinkables +generate(drinks, phenomena) = drinks +generate(undrinkable, phenomena) = undrinkables +generate(phenomenon, drinks) = phenomena diff --git a/extensions/spellcheck/hunspell/tests/unit/data/morph.test b/extensions/spellcheck/hunspell/tests/unit/data/morph.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/morph.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff new file mode 100644 index 0000000000..a5981ef69f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.aff @@ -0,0 +1,5 @@ +NEEDAFFIX X +COMPOUNDFLAG Y + +SFX A Y 1 +SFX A 0 s/Y . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic new file mode 100644 index 0000000000..b5792765e3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.dic @@ -0,0 +1,3 @@ +2 +foo/YXA +bar/Y diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good new file mode 100644 index 0000000000..f9e0663f3d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.good @@ -0,0 +1,3 @@ +bar +foos +barfoos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong new file mode 100644 index 0000000000..257cc5642c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix.wrong @@ -0,0 +1 @@ +foo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff new file mode 100644 index 0000000000..c434dac664 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.aff @@ -0,0 +1,2 @@ +NEEDAFFIX X +COMPOUNDFLAG Y diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic new file mode 100644 index 0000000000..ff32e878bd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.dic @@ -0,0 +1,5 @@ +4 +foo st:foo id:1 +foo/YX st:foo id:2 +foo/Y st:foo id:3 +bar/Y diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good new file mode 100644 index 0000000000..7e4b098ef9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.good @@ -0,0 +1,5 @@ +foo +bar +foobar +barfoo + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph new file mode 100644 index 0000000000..0f3e474312 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.morph @@ -0,0 +1,13 @@ +> foo +analyze(foo) = st:foo id:1 +analyze(foo) = st:foo id:3 +stem(foo) = foo +> bar +analyze(bar) = st:bar +stem(bar) = bar +> foobar +analyze(foobar) = pa:foo st:foo id:3 pa:bar +stem(foobar) = foo +> barfoo +analyze(barfoo) = pa:bar st:bar pa:foo st:foo id:3 +stem(barfoo) = barfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff new file mode 100644 index 0000000000..5d55d38e9e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.aff @@ -0,0 +1,8 @@ +# neeadaffix on affixes +NEEDAFFIX X + +SFX A Y 1 +SFX A 0 s/XB . + +SFX B Y 1 +SFX B 0 baz . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic new file mode 100644 index 0000000000..001d95e776 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.dic @@ -0,0 +1,2 @@ +2 +foo/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good new file mode 100644 index 0000000000..dc9a6a97d8 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.good @@ -0,0 +1,2 @@ +foo +foosbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong new file mode 100644 index 0000000000..c09c408f24 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix3.wrong @@ -0,0 +1 @@ +foos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff new file mode 100644 index 0000000000..c434dac664 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.aff @@ -0,0 +1,2 @@ +NEEDAFFIX X +COMPOUNDFLAG Y diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic new file mode 100644 index 0000000000..96f80c12b0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.dic @@ -0,0 +1,5 @@ +4 +foo/X [1] +foo/Y [2] +foo/YX [3] +bar/Y [4] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good new file mode 100644 index 0000000000..7e4b098ef9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.good @@ -0,0 +1,5 @@ +foo +bar +foobar +barfoo + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix4.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff new file mode 100644 index 0000000000..6399a3e98f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.aff @@ -0,0 +1,13 @@ +# on affixes +NEEDAFFIX X + +SFX A Y 2 +SFX A 0 suf/B . +SFX A 0 pseudosuf/XB . + +SFX B Y 1 +SFX B 0 bar . + +PFX C Y 2 +PFX C 0 pre . +PFX C 0 pseudopre/X . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic new file mode 100644 index 0000000000..83131e27a5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.dic @@ -0,0 +1,2 @@ +1 +foo/AC diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good new file mode 100644 index 0000000000..d1b86bf831 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.good @@ -0,0 +1,11 @@ +foo +prefoo +foosuf +prefoosuf +foosufbar +prefoosufbar +pseudoprefoosuf +pseudoprefoosufbar +pseudoprefoopseudosufbar +prefoopseudosuf +prefoopseudosufbar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong new file mode 100644 index 0000000000..fdd1797fdf --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/needaffix5.wrong @@ -0,0 +1,3 @@ +pseudoprefoo +foopseudosuf +pseudoprefoopseudosuf diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff new file mode 100644 index 0000000000..19e6981215 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.aff @@ -0,0 +1,21 @@ +# Test fix of suffixed ngram suggestions with UTF-8 encoding and long flags. +# Based on Vitaly Piryatinsky's bug report and example. +SET UTF-8 +FLAG num + +PFX 101 Y 1 +PFX 101 0 пред . + +SFX 1381 Y 1 +SFX 1381 0 о . + +SFX 2000 Y 3 +SFX 2000 0 ам . +SFX 2000 0 ами . +SFX 2000 0 ах . + +SFX 2022 Y 4 +SFX 2022 0 а . +SFX 2022 0 у . +SFX 2022 0 ом . +SFX 2022 0 е . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic new file mode 100644 index 0000000000..27ce413aeb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.dic @@ -0,0 +1,2 @@ +1 +человек/2022,2000,101 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good new file mode 100644 index 0000000000..366d92a9bc --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.good @@ -0,0 +1 @@ +человек diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug new file mode 100644 index 0000000000..58ab09b534 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.sug @@ -0,0 +1,2 @@ +человек +человек diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong new file mode 100644 index 0000000000..97de996e08 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/ngram-utf-fix.wrong @@ -0,0 +1,2 @@ +времячко +человеко diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff new file mode 100644 index 0000000000..c9361da4c6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.aff @@ -0,0 +1,5 @@ +# don't suggest word with NOSUGGEST flag (for example vulgar or obscene words) +# See OpenOffice.org Issue #55498 +# (nosuggest.sug is an empty file) +NOSUGGEST A +COMPOUNDFLAG B diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic new file mode 100644 index 0000000000..dc80c916d4 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.dic @@ -0,0 +1,3 @@ +1 +foo/AB +bar/B diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good new file mode 100644 index 0000000000..ad91a5e313 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.good @@ -0,0 +1,3 @@ +foo +foobar +barfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.sug diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong new file mode 100644 index 0000000000..89c7a1a9c0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/nosuggest.wrong @@ -0,0 +1,3 @@ +foox +foobarx +barfoox diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff b/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff new file mode 100644 index 0000000000..13a3d9b207 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.aff @@ -0,0 +1,12 @@ +# output conversion +SET UTF-8 + +OCONV 7 +OCONV a A +OCONV á Á +OCONV b B +OCONV c C +OCONV d D +OCONV e E +OCONV é É + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic b/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic new file mode 100644 index 0000000000..359186cac1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.dic @@ -0,0 +1,4 @@ +3 +bébé +dádá +aábcdeé diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.good b/extensions/spellcheck/hunspell/tests/unit/data/oconv.good new file mode 100644 index 0000000000..6cdaab16e3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.good @@ -0,0 +1,2 @@ +bébé +dádá diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug b/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug new file mode 100644 index 0000000000..a191c629dd --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.sug @@ -0,0 +1,3 @@ +BÉBÉ +DÁDÁ +AÁBCDEÉ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.test b/extensions/spellcheck/hunspell/tests/unit/data/oconv.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong b/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong new file mode 100644 index 0000000000..73dcc895a9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/oconv.wrong @@ -0,0 +1,3 @@ +béb +dád +aábcde diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff new file mode 100644 index 0000000000..e700b0e54a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.aff @@ -0,0 +1,5 @@ +# words only in compounds (see also fogemorpheme example) +ONLYINCOMPOUND O +COMPOUNDFLAG A +SFX B Y 1 +SFX B 0 s . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic new file mode 100644 index 0000000000..dc742f7abe --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.dic @@ -0,0 +1,3 @@ +2 +foo/A +pseudo/OAB diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good new file mode 100644 index 0000000000..151d597342 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.good @@ -0,0 +1,4 @@ +foo +pseudofoo +foopseudo +foopseudos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.sug diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong new file mode 100644 index 0000000000..115d0c6174 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound.wrong @@ -0,0 +1,2 @@ +pseudo +pseudos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff new file mode 100644 index 0000000000..5d0ac5e69b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.aff @@ -0,0 +1,12 @@ +# affixes only in compounds (see also fogemorpheme example) +ONLYINCOMPOUND O +COMPOUNDFLAG A +COMPOUNDPERMITFLAG P + +SFX B Y 1 +SFX B 0 s/OP . + +# obligate fogemorpheme by forbidding the stem (0) in compounds + +CHECKCOMPOUNDPATTERN 1 +CHECKCOMPOUNDPATTERN 0/B /A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic new file mode 100644 index 0000000000..1adab653bf --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.dic @@ -0,0 +1,3 @@ +2 +foo/A +pseudo/AB diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good new file mode 100644 index 0000000000..a31ce34aca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.good @@ -0,0 +1,3 @@ +foo +foopseudo +pseudosfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong new file mode 100644 index 0000000000..29a71a3c3b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/onlyincompound2.wrong @@ -0,0 +1,3 @@ +pseudos +foopseudos +pseudofoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff new file mode 100644 index 0000000000..413aca4049 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.aff @@ -0,0 +1,13 @@ +FLAG long +COMPOUNDBEGIN Ca +COMPOUNDMIDDLE Cb +COMPOUNDEND Cc +COMPOUNDPERMITFLAG Cp +ONLYINCOMPOUND Cx + +CHECKCOMPOUNDPATTERN 1 +CHECKCOMPOUNDPATTERN /Ch /Xs + +SFX Ch Y 2 +SFX Ch 0 s/CaCbCxCp . +SFX Ch 0 s-/CaCbCcCp . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic new file mode 100644 index 0000000000..e7831b7045 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.dic @@ -0,0 +1,4 @@ +3 +schoonheid/Ch +port/CcXs +sport/Cc diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good new file mode 100644 index 0000000000..fbaf830beb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.good @@ -0,0 +1 @@ +schoonheidssport diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong new file mode 100644 index 0000000000..3f9e8949b1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat.wrong @@ -0,0 +1 @@ +schoonheidsport diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff new file mode 100644 index 0000000000..22dfe69d86 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.aff @@ -0,0 +1,27 @@ +# Test file based on OpenTaal's Dutch dictionary, coded by Ruud Baars + +WORDCHARS - +NOSPLITSUGS +FLAG long + +COMPOUNDBEGIN Ca +COMPOUNDMIDDLE Cb +COMPOUNDEND Cc +COMPOUNDPERMITFLAG Cp +ONLYINCOMPOUND Cx + +CHECKCOMPOUNDPATTERN 2 +CHECKCOMPOUNDPATTERN 0/Ch /Xs +CHECKCOMPOUNDPATTERN 0/Xm /Xm + +SFX CA Y 2 +SFX CA 0 /CaCp . +SFX CA 0 -/CaCp . + +SFX CB Y 2 +SFX CB 0 /CbCp . +SFX CB 0 -/CbCp . + +SFX Ch Y 2 +SFX Ch 0 s/CaCbCxCp . +SFX Ch 0 s-/CaCbCcCp . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic new file mode 100644 index 0000000000..52581e942e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.dic @@ -0,0 +1,4 @@ +100 +test/CACBCc +zout/CACBXm +suiker/CACBXm
\ No newline at end of file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good new file mode 100644 index 0000000000..e604d6e2f7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.good @@ -0,0 +1 @@ +zout-suikertest diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong new file mode 100644 index 0000000000..d8ddb16a57 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-cpdpat2.wrong @@ -0,0 +1 @@ +zoutsuikertest diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff new file mode 100644 index 0000000000..fa073432f5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.aff @@ -0,0 +1,9 @@ +TRY r + +FORBIDDENWORD F +COMPOUNDRULE 2 +COMPOUNDRULE WW +COMPOUNDRULE WWW + +SFX S Y 1 +SFX S 0 s . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic new file mode 100644 index 0000000000..44375948ff --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.dic @@ -0,0 +1,5 @@ +4 +foo/W +word/W +bar/WS +foowordbar/FS diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good new file mode 100644 index 0000000000..73a96a7845 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.good @@ -0,0 +1,3 @@ +fooword +wordbar +barwordfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug new file mode 100644 index 0000000000..60111a417f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.sug @@ -0,0 +1 @@ +barwordfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong new file mode 100644 index 0000000000..59dfddfb24 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword1.wrong @@ -0,0 +1,5 @@ +foowordbar +foowordbars +foowordba +foowordbas +barwodfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff new file mode 100644 index 0000000000..441354d6b0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.aff @@ -0,0 +1,7 @@ +TRY r + +FORBIDDENWORD F +COMPOUNDFLAG W + +SFX S Y 1 +SFX S 0 s . diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic new file mode 100644 index 0000000000..895dd62305 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.dic @@ -0,0 +1,5 @@ +3 +foo/WS +word/W +bar/WS +foowordbar/FS
\ No newline at end of file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good new file mode 100644 index 0000000000..17cf47de3d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.good @@ -0,0 +1,4 @@ +fooword +wordbar +barwordfoo +barwordfoos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug new file mode 100644 index 0000000000..60111a417f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.sug @@ -0,0 +1 @@ +barwordfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong new file mode 100644 index 0000000000..59dfddfb24 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-forbiddenword2.wrong @@ -0,0 +1,5 @@ +foowordbar +foowordbars +foowordba +foowordbas +barwodfoo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff new file mode 100644 index 0000000000..15c914bec3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.aff @@ -0,0 +1,8 @@ +KEEPCASE K +COMPOUNDBEGIN B +COMPOUNDEND E +COMPOUNDFLAG C +COMPOUNDMIN 1 +WORDCHARS - +BREAK 1 +BREAK # diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic new file mode 100644 index 0000000000..b05ec131a2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.dic @@ -0,0 +1,7 @@ +5 +tv-/KB +-tv/KE +word/C +NATO-/B +-NATO/E + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good new file mode 100644 index 0000000000..e1c1129102 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.good @@ -0,0 +1,4 @@ +tv-word +word-tv +NATO-word +word-NATO diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug new file mode 100644 index 0000000000..07dde3fe68 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.sug @@ -0,0 +1,8 @@ +Tv-word, Tv- word, Word +Tv- word, Word +word -tv, word-tv, word +word -tv, word-tv, word +wordword-tv, word +Tv-word-tv +NATO- +-NATO diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong new file mode 100644 index 0000000000..b15752ed20 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/opentaal-keepcase.wrong @@ -0,0 +1,8 @@ +TV-word +Tv-word +word-TV +word-Tv +wordword-TV +TV-word-TV +Nato-word +word-nato diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.aff b/extensions/spellcheck/hunspell/tests/unit/data/phone.aff new file mode 100644 index 0000000000..5a27c14d7e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.aff @@ -0,0 +1,255 @@ +# phonetic suggestions by PHONE and optional ph field of dictionary words +# Documentationo of PHONE: http://aspell.net/man-html/Phonetic-Code.html + +# phonetic_english.h - phonetic transformation rules for use with phonetic.c +# Copyright (C) 2000 Björn Jacke +# +# This rule set is based on Lawrence Phillips original metaphone +# algorithm with modifications made by Michael Kuhn in his +# C implantation, more modifications by Björn Jacke when +# converting the algorithm to a rule set and minor +# touch ups by Kevin Atkinson +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1 as published by the Free Software Foundation; +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Björn Jacke may be reached by email at bjoern.jacke@gmx.de +# +# Changelog: +# +# 2000-01-05 Björn Jacke <bjoern.jacke@gmx.de> +# - first version with translation rules derived from +# metaphone.cc distributed with aspell 0.28.3 +# - "TH" is now representated as "@" because "0" is a +# meta character +# - removed TH(!vowel) --> T; always use TH --> # instead +# - dropped "^AE" -> "E" (redundant) +# - "ing" is transformed to "N", not "NK" +# - "SCH(EO)" transforms to "SK" now +# - added R --> SILENT if (after a vowel) and no (vowel or +# "y" follows) like in "Marcy" or "abort" +# - H is SILENT in RH at beginning of words +# - H is SILENT if vowel leads and "Y" follows +# - some ".OUGH.." --> ...F exceptions added +# - "^V" transforms to "W" +# 2000-01-07 Kevin Atkinson <kevinatk@home.com> +# Converted from header to data file. +# +# 2007-08-23 László Németh <nemeth AT OOo> +# Add PHONE header and PHONE keywords +# +# version 1.1 + +PHONE 105 +PHONE AH(AEIOUY)-^ *H +PHONE AR(AEIOUY)-^ *R +PHONE A(HR)^ * +PHONE A^ * +PHONE AH(AEIOUY)- H +PHONE AR(AEIOUY)- R +PHONE A(HR) _ +PHONE BB- _ +PHONE B B +PHONE CQ- _ +PHONE CIA X +PHONE CH X +PHONE C(EIY)- S +PHONE CK K +PHONE COUGH^ KF +PHONE CC< C +PHONE C K +PHONE DG(EIY) K +PHONE DD- _ +PHONE D T +PHONE < E +PHONE EH(AEIOUY)-^ *H +PHONE ER(AEIOUY)-^ *R +PHONE E(HR)^ * +PHONE ENOUGH^$ *NF +PHONE E^ * +PHONE EH(AEIOUY)- H +PHONE ER(AEIOUY)- R +PHONE E(HR) _ +PHONE FF- _ +PHONE F F +PHONE GN^ N +PHONE GN$ N +PHONE GNS$ NS +PHONE GNED$ N +PHONE GH(AEIOUY)- K +PHONE GH _ +PHONE GG9 K +PHONE G K +PHONE H H +PHONE IH(AEIOUY)-^ *H +PHONE IR(AEIOUY)-^ *R +PHONE I(HR)^ * +PHONE I^ * +PHONE ING6 N +PHONE IH(AEIOUY)- H +PHONE IR(AEIOUY)- R +PHONE I(HR) _ +PHONE J K +PHONE KN^ N +PHONE KK- _ +PHONE K K +PHONE LAUGH^ LF +PHONE LL- _ +PHONE L L +PHONE MB$ M +PHONE MM M +PHONE M M +PHONE NN- _ +PHONE N N +PHONE OH(AEIOUY)-^ *H +PHONE OR(AEIOUY)-^ *R +PHONE O(HR)^ * +PHONE O^ * +PHONE OH(AEIOUY)- H +PHONE OR(AEIOUY)- R +PHONE O(HR) _ +PHONE PH F +PHONE PN^ N +PHONE PP- _ +PHONE P P +PHONE Q K +PHONE RH^ R +PHONE ROUGH^ RF +PHONE RR- _ +PHONE R R +PHONE SCH(EOU)- SK +PHONE SC(IEY)- S +PHONE SH X +PHONE SI(AO)- X +PHONE SS- _ +PHONE S S +PHONE TI(AO)- X +PHONE TH @ +PHONE TCH-- _ +PHONE TOUGH^ TF +PHONE TT- _ +PHONE T T +PHONE UH(AEIOUY)-^ *H +PHONE UR(AEIOUY)-^ *R +PHONE U(HR)^ * +PHONE U^ * +PHONE UH(AEIOUY)- H +PHONE UR(AEIOUY)- R +PHONE U(HR) _ +PHONE V^ W +PHONE V F +PHONE WR^ R +PHONE WH^ W +PHONE W(AEIOU)- W +PHONE X^ S +PHONE X KS +PHONE Y(AEIOU)- Y +PHONE ZZ- _ +PHONE Z S + +#The rules in a different view: +# +# Exceptions: +# +# Beginning of word: "gn", "kn-", "pn-", "wr-" ----> drop first letter +# "Aebersold", "Gnagy", "Knuth", "Pniewski", "Wright" +# +# Beginning of word: "x" ----> change to "s" +# as in "Deng Xiaopeng" +# +# Beginning of word: "wh-" ----> change to "w" +# as in "Whalen" +# Beginning of word: leading vowels are transformed to "*" +# +# "[crt]ough" and "enough" are handled separately because of "F" sound +# +# +# A --> A at beginning +# _ otherwise +# +# B --> B unless at the end of word after "m", as in "dumb", "McComb" +# +# C --> X (sh) if "-cia-" or "-ch-" +# S if "-ci-", "-ce-", or "-cy-" +# SILENT if "-sci-", "-sce-", or "-scy-", or "-cq-" +# K otherwise, including in "-sch-" +# +# D --> K if in "-dge-", "-dgy-", or "-dgi-" +# T otherwise +# +# E --> A at beginnig +# _ SILENT otherwise +# +# F --> F +# +# G --> SILENT if in "-gh-" and not at end or before a vowel +# in "-gn" or "-gned" or "-gns" +# in "-dge-" etc., as in above rule +# K if before "i", or "e", or "y" if not double "gg" +# +# K otherwise (incl. "GG"!) +# +# H --> SILENT if after vowel and no vowel or "Y" follows +# or after "-ch-", "-sh-", "-ph-", "-th-", "-gh-" +# or after "rh-" at beginning +# H otherwise +# +# I --> A at beginning +# _ SILENT otherwise +# +# J --> K +# +# K --> SILENT if after "c" +# K otherwise +# +# L --> L +# +# M --> M +# +# N --> N +# +# O --> A at beginning +# _ SILENT otherwise +# +# P --> F if before "h" +# P otherwise +# +# Q --> K +# +# R --> SILENT if after vowel and no vowel or "Y" follows +# R otherwise +# +# S --> X (sh) if before "h" or in "-sio-" or "-sia-" +# SK if followed by "ch(eo)" (SCH(EO)) +# S otherwise +# +# T --> X (sh) if "-tia-" or "-tio-" +# 0 (th) if before "h" +# silent if in "-tch-" +# T otherwise +# +# U --> A at beginning +# _ SILENT otherwise +# +# V --> V if first letter of word +# F otherwise +# +# W --> SILENT if not followed by a vowel +# W if followed by a vowel +# +# X --> KS +# +# Y --> SILENT if not followed by a vowel +# Y if followed by a vowel +# +# Z --> S diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.dic b/extensions/spellcheck/hunspell/tests/unit/data/phone.dic new file mode 100644 index 0000000000..51b0743d0e --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.dic @@ -0,0 +1,11 @@ +10 +Brasilia +brassily +Brazilian +brilliance +brilliancy +brilliant +brain +brass +Churchillian +xxxxxxxxxx ph:Brasilia diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.sug b/extensions/spellcheck/hunspell/tests/unit/data/phone.sug new file mode 100644 index 0000000000..cc22e37984 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.sug @@ -0,0 +1 @@ +Brasilia, Xxxxxxxxxx, Brilliant, Brazilian, Brassily, Brilliance diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.test b/extensions/spellcheck/hunspell/tests/unit/data/phone.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong b/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong new file mode 100644 index 0000000000..ca9db395e3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/phone.wrong @@ -0,0 +1 @@ +Brasillian diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.aff b/extensions/spellcheck/hunspell/tests/unit/data/rep.aff new file mode 100644 index 0000000000..485755c898 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.aff @@ -0,0 +1,21 @@ +# With REP suggestions, we can fix typical language specific misspellings. + +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 + +REP 8 +REP f ph +REP ph f +REP shun$ tion +REP ^alot$ a_lot # add the highest priority for "a lot" suggestion to "alot" +REP ^foo$ bar +REP ' _ # "un'alunno" -> "un alunno" +REP ^vinten$ vinte_e_un +REP s 's + + +SFX A Y 1 +SFX A 0 's . + + +WORDCHARS ' diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.dic b/extensions/spellcheck/hunspell/tests/unit/data/rep.dic new file mode 100644 index 0000000000..f9a4c008b0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.dic @@ -0,0 +1,15 @@ +10 +form +phantom +vacation +vacations +a +lot +un +alunno +bar +barbars +vinte +e +un +auto/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.sug b/extensions/spellcheck/hunspell/tests/unit/data/rep.sug new file mode 100644 index 0000000000..b48a5b80eb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.sug @@ -0,0 +1,8 @@ +form +phantom +vacation +a lot, lot +un alunno +bar +vinte e un +auto's, auto diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.test b/extensions/spellcheck/hunspell/tests/unit/data/rep.test new file mode 100644 index 0000000000..dc295077fb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i ISO8859-1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong b/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong new file mode 100644 index 0000000000..cd9699c4c7 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/rep.wrong @@ -0,0 +1,11 @@ +phorm +fantom +vacashun +vacashuns +alot +un'alunno +foo +foobars +barfoos +vinten +autos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff b/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff new file mode 100644 index 0000000000..ac434a4267 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.aff @@ -0,0 +1,9 @@ +# With REP suggestions, we can fix typical language specific misspellings. + +SET UTF-8 + +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 + +REP 1 +REP oo őő diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic b/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic new file mode 100644 index 0000000000..1890fcb8eb --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.dic @@ -0,0 +1,2 @@ +1 +főő diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug b/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug new file mode 100644 index 0000000000..8a00bc3717 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.sug @@ -0,0 +1 @@ +főő diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.test b/extensions/spellcheck/hunspell/tests/unit/data/reputf.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong new file mode 100644 index 0000000000..257cc5642c --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/reputf.wrong @@ -0,0 +1 @@ +foo diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff new file mode 100644 index 0000000000..3ab347319a --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.aff @@ -0,0 +1,8 @@ +# Forbid compound word with triple letters +CHECKCOMPOUNDTRIPLE +# Allow simplified forms +SIMPLIFIEDTRIPLE + +COMPOUNDMIN 2 + +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic new file mode 100644 index 0000000000..cfe7a35dce --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.dic @@ -0,0 +1,3 @@ +2 +glass/A +sko/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good new file mode 100644 index 0000000000..23a4815e8b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.good @@ -0,0 +1,3 @@ +glass +sko +glassko diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong new file mode 100644 index 0000000000..2811287685 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/simplifiedtriple.wrong @@ -0,0 +1 @@ +glasssko diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.aff b/extensions/spellcheck/hunspell/tests/unit/data/slash.aff new file mode 100644 index 0000000000..6ab104b9ee --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.aff @@ -0,0 +1,4 @@ +# slashes in words (\/) + +# (only for tokenization) +WORDCHARS /: diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.dic b/extensions/spellcheck/hunspell/tests/unit/data/slash.dic new file mode 100644 index 0000000000..478276df68 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.dic @@ -0,0 +1,5 @@ +4 +/ +1\/2 +http:\/\/ +\/usr\/share\/myspell\/ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.good b/extensions/spellcheck/hunspell/tests/unit/data/slash.good new file mode 100644 index 0000000000..4a25e205f6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.good @@ -0,0 +1,4 @@ +/ +1/2 +http:// +/usr/share/myspell/ diff --git a/extensions/spellcheck/hunspell/tests/unit/data/slash.test b/extensions/spellcheck/hunspell/tests/unit/data/slash.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/slash.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.aff b/extensions/spellcheck/hunspell/tests/unit/data/sug.aff new file mode 100644 index 0000000000..b1f2adba60 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.aff @@ -0,0 +1,15 @@ +# new suggestion methods of Hunspell 1.5: +# capitalization: nasa -> NASA +# long swap: permenant -> permanent +# long mov: Ghandi -> Gandhi +# double two characters: vacacation -> vacation +# space with REP: "alot" -> "a lot" ("a lot" need to be in the dic file.) + +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 +REP 1 +REP alot a_lot +KEY qwertzuiop|asdfghjkl|yxcvbnm|aq +WORDCHARS . +FORBIDDENWORD ? + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.dic b/extensions/spellcheck/hunspell/tests/unit/data/sug.dic new file mode 100644 index 0000000000..0c22cedf42 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.dic @@ -0,0 +1,11 @@ +1 +NASA +Gandhi +grateful +permanent +vacation +a +lot +have +which +McDonald diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.sug b/extensions/spellcheck/hunspell/tests/unit/data/sug.sug new file mode 100644 index 0000000000..e277bdb778 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.sug @@ -0,0 +1,12 @@ +NASA +Gandhi +grateful +permanent +vacation +a lot, lot +permanent. Vacation +have +which +Gandhi +McDonald +permanent diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.test b/extensions/spellcheck/hunspell/tests/unit/data/sug.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong b/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong new file mode 100644 index 0000000000..4d184d5a61 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sug.wrong @@ -0,0 +1,12 @@ +nasa +Ghandi +greatful +permenant +vacacation +alot +permanent.Vacation +ahev +hwihc +GAndhi +Mcdonald +permqnent diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt new file mode 100644 index 0000000000..571f3796a6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/List_of_common_misspellings.txt @@ -0,0 +1,4020 @@ +# source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines +abandonned abandoned +aberation aberration +abilties abilities +abilty ability +abondon abandon +abondoned abandoned +abondoning abandoning +abondons abandons +aborigene aborigine +abortificant abortifacient +abreviated abbreviated +abreviation abbreviation +abritrary arbitrary +absense absence +absolutly absolutely +absorbsion absorption +absorbtion absorption +abundacies abundances +abundancies abundances +abundunt abundant +abutts abuts +acadamy academy +acadmic academic +accademic academic +accademy academy +acccused accused +accelleration acceleration +accension accession, ascension +acceptence acceptance +acceptible acceptable +accessable accessible +accidentaly accidentally +accidently accidentally +acclimitization acclimatization +acommodate accommodate +accomadate accommodate +accomadated accommodated +accomadates accommodates +accomadating accommodating +accomadation accommodation +accomadations accommodations +accomdate accommodate +accomodate accommodate +accomodated accommodated +accomodates accommodates +accomodating accommodating +accomodation accommodation +accomodations accommodations +accompanyed accompanied +accordeon accordion +accordian accordion +accoring according +accoustic acoustic +accquainted acquainted +accross across +accussed accused +acedemic academic +acheive achieve +acheived achieved +acheivement achievement +acheivements achievements +acheives achieves +acheiving achieving +acheivment achievement +acheivments achievements +achievment achievement +achievments achievements +achive achieve, archive +achived achieved, archived +achivement achievement +achivements achievements +acknowldeged acknowledged +acknowledgeing acknowledging +ackward awkward, backward +acomplish accomplish +acomplished accomplished +acomplishment accomplishment +acomplishments accomplishments +acording according +acordingly accordingly +acquaintence acquaintance +acquaintences acquaintances +acquiantence acquaintance +acquiantences acquaintances +acquited acquitted +activites activities +activly actively +actualy actually +acuracy accuracy +acused accused +acustom accustom +acustommed accustomed +adavanced advanced +adbandon abandon +additinally additionally +additionaly additionally +addmission admission +addopt adopt +addopted adopted +addoptive adoptive +addres address, adders +addresable addressable +addresed addressed +addresing addressing +addressess addresses +addtion addition +addtional additional +adecuate adequate +adhearing adhering +adherance adherence +admendment amendment +admininistrative administrative +adminstered administered +adminstrate administrate +adminstration administration +adminstrative administrative +adminstrator administrator +admissability admissibility +admissable admissible +admited admitted +admitedly admittedly +adn and +adolecent adolescent +adquire acquire +adquired acquired +adquires acquires +adquiring acquiring +adres address +adresable addressable +adresing addressing +adress address +adressable addressable +adressed addressed +adressing addressing, dressing +adventrous adventurous +advertisment advertisement +advertisments advertisements +advesary adversary +adviced advised +aeriel aerial +aeriels aerials +afair affair +afficianados aficionados +afficionado aficionado +afficionados aficionados +affilate affiliate +affilliate affiliate +affort afford, effort +aforememtioned aforementioned +againnst against +agains against +agaisnt against +aganist against +aggaravates aggravates +aggreed agreed +aggreement agreement +aggregious egregious +aggresive aggressive +agian again +agianst against +agin again +agina again, angina +aginst against +agravate aggravate +agre agree +agred agreed +agreeement agreement +agreemnt agreement +agregate aggregate +agregates aggregates +agreing agreeing +agression aggression +agressive aggressive +agressively aggressively +agressor aggressor +agricuture agriculture +agrieved aggrieved +ahev have +ahppen happen +ahve have +aicraft aircraft +aiport airport +airbourne airborne +aircaft aircraft +aircrafts aircraft +airporta airports +airrcraft aircraft +aisian asian +albiet albeit +alchohol alcohol +alchoholic alcoholic +alchol alcohol +alcholic alcoholic +alcohal alcohol +alcoholical alcoholic +aledge allege +aledged alleged +aledges alleges +alege allege +aleged alleged +alegience allegiance +algebraical algebraic +algorhitms algorithms +algoritm algorithm +algoritms algorithms +alientating alienating +alledge allege +alledged alleged +alledgedly allegedly +alledges alleges +allegedely allegedly +allegedy allegedly +allegely allegedly +allegence allegiance +allegience allegiance +allign align +alligned aligned +alliviate alleviate +allopone allophone +allopones allophones +allready already +allthough although +alltime all-time +alltogether altogether +almsot almost +alochol alcohol +alomst almost +alot a lot, allot +alotted allotted +alowed allowed +alowing allowing +alreayd already +alse else +alsot also +alternitives alternatives +altho although +althought although +altough although +alusion allusion, illusion +alwasy always +alwyas always +amalgomated amalgamated +amatuer amateur +amature armature, amateur +amendmant amendment +amerliorate ameliorate +amke make +amking making +ammend amend +ammended amended +ammendment amendment +ammendments amendments +ammount amount +ammused amused +amoung among +amoungst amongst +amung among +analagous analogous +analitic analytic +analogeous analogous +anarchim anarchism +anarchistm anarchism +anbd and +ancestory ancestry +ancilliary ancillary +androgenous androgynous +androgeny androgyny +anihilation annihilation +aniversary anniversary +annoint anoint +annointed anointed +annointing anointing +annoints anoints +annouced announced +annualy annually +annuled annulled +anohter another +anomolies anomalies +anomolous anomalous +anomoly anomaly +anonimity anonymity +anounced announced +ansalisation nasalisation +ansalization nasalization +ansestors ancestors +antartic antarctic +anthromorphization anthropomorphization +anual annual, anal +anulled annulled +anwsered answered +anyhwere anywhere +anyother any other +anytying anything +aparent apparent +aparment apartment +apenines apennines, Apennines +aplication application +aplied applied +apolegetics apologetics +apon upon, apron +apparant apparent +apparantly apparently +appart apart +appartment apartment +appartments apartments +appealling appealing, appalling +appeareance appearance +appearence appearance +appearences appearances +appenines apennines, Apennines +apperance appearance +apperances appearances +applicaiton application +applicaitons applications +appologies apologies +appology apology +apprearance appearance +apprieciate appreciate +approachs approaches +appropiate appropriate +appropraite appropriate +appropropiate appropriate +approproximate approximate +approxamately approximately +approxiately approximately +approximitely approximately +aprehensive apprehensive +apropriate appropriate +aproximate approximate +aproximately approximately +aquaintance acquaintance +aquainted acquainted +aquiantance acquaintance +aquire acquire +aquired acquired +aquiring acquiring +aquisition acquisition +aquitted acquitted +aranged arranged +arangement arrangement +arbitarily arbitrarily +arbitary arbitrary +archaelogists archaeologists +archaelogy archaeology +archaoelogy archeology, archaeology +archaology archeology, archaeology +archeaologist archeologist, archaeologist +archeaologists archeologists, archaeologists +archetect architect +archetects architects +archetectural architectural +archetecturally architecturally +archetecture architecture +archiac archaic +archictect architect +archimedian archimedean +architechturally architecturally +architechture architecture +architechtures architectures +architectual architectural +archtype archetype +archtypes archetypes +aready already +areodynamics aerodynamics +argubly arguably +arguement argument +arguements arguments +arised arose +arival arrival +armamant armament +armistace armistice +aroud around +arrangment arrangement +arrangments arrangements +arround around +artical article +artice article +articel article +artifical artificial +artifically artificially +artillary artillery +arund around +asetic ascetic +asign assign +aslo also +asociated associated +asorbed absorbed +asphyxation asphyxiation +assasin assassin +assasinate assassinate +assasinated assassinated +assasinates assassinates +assasination assassination +assasinations assassinations +assasined assassinated +assasins assassins +assassintation assassination +assemple assemble +assertation assertion +asside aside +assisnate assassinate +assit assist +assitant assistant +assocation association +assoicate associate +assoicated associated +assoicates associates +assosication assassination +asssassans assassins +assualt assault +assualted assaulted +assymetric asymmetric +assymetrical asymmetrical +asteriod asteroid +asthetic aesthetic +asthetical aesthetical +asthetically aesthetically +asume assume +aswell as well +atain attain +atempting attempting +atheistical atheistic +athenean athenian +atheneans athenians +athiesm atheism +athiest atheist +atorney attorney +atribute attribute +atributed attributed +atributes attributes +attaindre attainder, attained +attemp attempt +attemped attempted +attemt attempt +attemted attempted +attemting attempting +attemts attempts +attendence attendance +attendent attendant +attendents attendants +attened attended +attension attention +attitide attitude +attributred attributed +attrocities atrocities +audeince audience +auromated automated +austrailia Australia +austrailian Australian +auther author +authobiographic autobiographic +authobiography autobiography +authorative authoritative +authorites authorities +authorithy authority +authoritiers authorities +authoritive authoritative +authrorities authorities +autochtonous autochthonous +autoctonous autochthonous +automaticly automatically +automibile automobile +automonomous autonomous +autor author +autority authority +auxilary auxiliary +auxillaries auxiliaries +auxillary auxiliary +auxilliaries auxiliaries +auxilliary auxiliary +availablity availability +availaible available +availble available +availiable available +availible available +avalable available +avalance avalanche +avaliable available +avation aviation +avengence a vengeance +averageed averaged +avilable available +awared awarded +awya away +baceause because +backgorund background +backrounds backgrounds +bakc back +banannas bananas +bandwith bandwidth +bankrupcy bankruptcy +banruptcy bankruptcy +baout about, bout +basicaly basically +basicly basically +bcak back +beachead beachhead +beacuse because +beastiality bestiality +beatiful beautiful +beaurocracy bureaucracy +beaurocratic bureaucratic +beautyfull beautiful +becamae became +becasue because +beccause because +becomeing becoming +becomming becoming +becouse because +becuase because +bedore before +befoer before +beggin begin, begging +begginer beginner +begginers beginners +beggining beginning +begginings beginnings +beggins begins +begining beginning +beginnig beginning +behavour behavior, behaviour +beleagured beleaguered +beleif belief +beleive believe +beleived believed +beleives believes +beleiving believing +beligum belgium +belive believe +belived believed +belives believes, beliefs +belligerant belligerent +bellweather bellwether +bemusemnt bemusement +beneficary beneficiary +beng being +benificial beneficial +benifit benefit +benifits benefits +bergamont bergamot +Bernouilli Bernoulli +beseige besiege +beseiged besieged +beseiging besieging +betwen between +beween between +bewteen between +bilateraly bilaterally +billingualism bilingualism +binominal binomial +bizzare bizarre +blaim blame +blaimed blamed +blessure blessing +Blitzkreig Blitzkrieg +boaut bout, boat, about +bodydbuilder bodybuilder +bombardement bombardment +bombarment bombardment +bondary boundary +Bonnano Bonanno +borke broke +boundry boundary +bouyancy buoyancy +bouyant buoyant +boyant buoyant +Brasillian Brazilian +breakthough breakthrough +breakthroughts breakthroughs +breif brief +breifly briefly +brethen brethren +bretheren brethren +briliant brilliant +brillant brilliant +brimestone brimstone +Britian Britain +Brittish British +broacasted broadcast +broadacasting broadcasting +broady broadly +Buddah Buddha +buisness business +buisnessman businessman +buoancy buoyancy +buring burying, burning, burin, during +burried buried +busineses business, businesses +busness business +bussiness business +cacuses caucuses +cahracters characters +calaber caliber +calander calendar, calender, colander +calculs calculus +calenders calendars +caligraphy calligraphy +caluclate calculate +caluclated calculated +caluculate calculate +caluculated calculated +calulate calculate +calulated calculated +Cambrige Cambridge +camoflage camouflage +campain campaign +campains campaigns +candadate candidate +candiate candidate +candidiate candidate +cannister canister +cannisters canisters +cannnot cannot +cannonical canonical +cannotation connotation +cannotations connotations +cant cannot, can not, can't +caost coast +caperbility capability +Capetown Cape Town +capible capable +captial capital +captued captured +capturd captured +carachter character +caracterized characterized +carcas carcass, Caracas +carefull careful +careing caring +carismatic charismatic +Carmalite Carmelite +carmel caramel, carmel-by-the-sea +carniverous carnivorous +carreer career +carrers careers +Carribbean Caribbean +Carribean Caribbean +cartdridge cartridge +Carthagian Carthaginian +carthographer cartographer +cartilege cartilage +cartilidge cartilage +cartrige cartridge +casette cassette +casion caisson +cassawory cassowary +cassowarry cassowary +casulaties casualties +casulaty casualty +catagories categories +catagorized categorized +catagory category +catergorize categorize +catergorized categorized +Cataline Catiline, Catalina +cathlic catholic +catholocism catholicism +catterpilar caterpillar +catterpilars caterpillars +cattleship battleship +causalities casualties +Ceasar Caesar +Celcius Celsius +cellpading cellpadding +cementary cemetery +cemetarey cemetery +cemetaries cemeteries +cemetary cemetery +cencus census +censur censor, censure +cententenial centennial +centruies centuries +centruy century +ceratin certain, keratin +cerimonial ceremonial +cerimonies ceremonies +cerimonious ceremonious +cerimony ceremony +ceromony ceremony +certainity certainty +certian certain +cervial cervical, servile, serval +chalenging challenging +challange challenge +challanged challenged +challege challenge +Champange Champagne +changable changeable +charachter character +charactor character +charachters characters +charactersistic characteristic +charactors characters +charasmatic charismatic +charaterized characterized +chariman chairman +charistics characteristics +chasr chaser, chase +cheif chief +chemcial chemical +chemcially chemically +chemestry chemistry +chemicaly chemically +childbird childbirth +childen children +choosen chosen +chracter character +chuch church +churchs churches +Cincinatti Cincinnati +Cincinnatti Cincinnati +circulaton circulation +circumsicion circumcision +circut circuit +ciricuit circuit +ciriculum curriculum +civillian civilian +claer clear +claerer clearer +claerly clearly +claimes claims +clas class +clasic classic +clasical classical +clasically classically +cleareance clearance +clera clear, sclera +clincial clinical +clinicaly clinically +cmo com +cmoputer computer +co-incided coincided +coctail cocktail +coform conform +cognizent cognizant +coincedentally coincidentally +colaborations collaborations +colateral collateral +colelctive collective +collaberative collaborative +collecton collection +collegue colleague +collegues colleagues +collonade colonnade +collonies colonies +collony colony +collosal colossal +colonizators colonizers +comander commander, commandeer +comando commando +comandos commandos +comany company +comapany company +comback comeback +combanations combinations +combinatins combinations +combusion combustion +comdemnation condemnation +comemmorates commemorates +comemoretion commemoration +comision commission +comisioned commissioned +comisioner commissioner +comisioning commissioning +comisions commissions +comission commission +comissioned commissioned +comissioner commissioner +comissioning commissioning +comissions commissions +comited committed +comiting committing +comitted committed +comittee committee +comitting committing +commandoes commandos +commedic comedic +commemerative commemorative +commemmorate commemorate +commemmorating commemorating +commerical commercial +commerically commercially +commericial commercial +commericially commercially +commerorative commemorative +comming coming +comminication communication +commision commission +commisioned commissioned +commisioner commissioner +commisioning commissioning +commisions commissions +commited committed +commitee committee +commiting committing +committe committee +committment commitment +committments commitments +commmemorated commemorated +commongly commonly +commonweath commonwealth +commuications communications +commuinications communications +communciation communication +communiation communication +communites communities +compability compatibility +comparision comparison +comparisions comparisons +comparitive comparative +comparitively comparatively +compatabilities compatibilities +compatability compatibility +compatable compatible +compatablities compatibilities +compatablity compatibility +compatiable compatible +compatiblities compatibilities +compatiblity compatibility +compeitions competitions +compensantion compensation +competance competence +competant competent +competative competitive +competion competition, completion +competitiion competition +competive competitive +competiveness competitiveness +comphrehensive comprehensive +compitent competent +completedthe completed the +completelyl completely +completetion completion +complier compiler +componant component +comprable comparable +comprimise compromise +compulsary compulsory +compulsery compulsory +computarized computerized +concensus consensus +concider consider +concidered considered +concidering considering +conciders considers +concieted conceited +concieved conceived +concious conscious +conciously consciously +conciousness consciousness +condamned condemned +condemmed condemned +condidtion condition +condidtions conditions +conditionsof conditions of +conected connected +conection connection +conesencus consensus +confidental confidential +confidentally confidentially +confids confides +configureable configurable +confortable comfortable +congradulations congratulations +congresional congressional +conived connived +conjecutre conjecture +conjuction conjunction +Conneticut Connecticut +conotations connotations +conquerd conquered +conquerer conqueror +conquerers conquerors +conqured conquered +conscent consent +consciouness consciousness +consdider consider +consdidered considered +consdiered considered +consectutive consecutive +consenquently consequently +consentrate concentrate +consentrated concentrated +consentrates concentrates +consept concept +consequentually consequently +consequeseces consequences +consern concern +conserned concerned +conserning concerning +conservitive conservative +consiciousness consciousness +consicousness consciousness +considerd considered +consideres considered +consious conscious +consistant consistent +consistantly consistently +consituencies constituencies +consituency constituency +consituted constituted +consitution constitution +consitutional constitutional +consolodate consolidate +consolodated consolidated +consonent consonant +consonents consonants +consorcium consortium +conspiracys conspiracies +conspiriator conspirator +constaints constraints +constanly constantly +constarnation consternation +constatn constant +constinually continually +constituant constituent +constituants constituents +constituion constitution +constituional constitutional +consttruction construction +constuction construction +consulant consultant +consumate consummate +consumated consummated +contaiminate contaminate +containes contains +contamporaries contemporaries +contamporary contemporary +contempoary contemporary +contemporaneus contemporaneous +contempory contemporary +contendor contender +contined continued +continous continuous +continously continuously +continueing continuing +contravercial controversial +contraversy controversy +contributer contributor +contributers contributors +contritutions contributions +controled controlled +controling controlling +controll control +controlls controls +controvercial controversial +controvercy controversy +controveries controversies +controversal controversial +controversey controversy +controvertial controversial +controvery controversy +contruction construction +conveinent convenient +convenant covenant +convential conventional +convertables convertibles +convertion conversion +conveyer conveyor +conviced convinced +convienient convenient +coordiantion coordination +coorperation cooperation, corporation +coorperations corporations +copmetitors competitors +coputer computer +copywrite copyright +coridal cordial +cornmitted committed +corosion corrosion +corparate corporate +corperations corporations +correcters correctors +correponding corresponding +correposding corresponding +correspondant correspondent +correspondants correspondents +corridoors corridors +corrispond correspond +corrispondant correspondent +corrispondants correspondents +corrisponded corresponded +corrisponding corresponding +corrisponds corresponds +costitution constitution +coucil council +coudl could, cloud +councellor councillor, counselor, councilor +councellors councillors, counselors, councilors +counries countries +countains contains +countires countries +coururier courier, couturier +coverted converted, covered, coveted +cpoy coy, copy +creaeted created +creedence credence +critereon criterion +criterias criteria +criticists critics +critising criticising, criticizing +critisising criticising +critisism criticism +critisisms criticisms +critisize criticise, criticize +critisized criticised, criticized +critisizes criticises, criticizes +critisizing criticising, criticizing +critized criticized +critizing criticizing +crockodiles crocodiles +crowm crown +crtical critical +crticised criticised +crucifiction crucifixion +crusies cruises +crystalisation crystallisation +culiminating culminating +cumulatative cumulative +curch church +curcuit circuit +currenly currently +curriculem curriculum +cxan cyan +cyclinder cylinder +dael deal, dial, dahl +dalmation dalmatian +damenor demeanor +Dardenelles Dardanelles +dacquiri daiquiri +debateable debatable +decendant descendant +decendants descendants +decendent descendant +decendents descendants +decideable decidable +decidely decidedly +decieved deceived +decison decision +decomissioned decommissioned +decomposit decompose +decomposited decomposed +decompositing decomposing +decomposits decomposes +decress decrees +decribe describe +decribed described +decribes describes +decribing describing +dectect detect +defendent defendant +defendents defendants +deffensively defensively +deffine define +deffined defined +definance defiance +definate definite +definately definitely +definatly definitely +definetly definitely +definining defining +definit definite +definitly definitely +definiton definition +defintion definition +degrate degrade +delagates delegates +delapidated dilapidated +delerious delirious +delevopment development +deliberatly deliberately +delusionally delusively +demenor demeanor +demographical demographic +demolision demolition +demorcracy democracy +demostration demonstration +denegrating denigrating +densly densely +deparment department +deparments departments +deparmental departmental +dependance dependence +dependancy dependency +dependant dependent +deram dram, dream +deriviated derived +derivitive derivative +derogitory derogatory +descendands descendants +descibed described +descision decision +descisions decisions +descriibes describes +descripters descriptors +descripton description +desctruction destruction +descuss discuss +desgined designed +deside decide +desigining designing +desinations destinations +desintegrated disintegrated +desintegration disintegration +desireable desirable +desitned destined +desktiop desktop +desorder disorder +desoriented disoriented +desparate desperate, disparate +despatched dispatched +despict depict +despiration desperation +dessicated desiccated +dessigned designed +destablized destabilized +destory destroy +detailled detailed +detatched detached +deteoriated deteriorated +deteriate deteriorate +deterioriating deteriorating +determinining determining +detremental detrimental +devasted devastated +develope develop +developement development +developped developed +develpment development +devels delves +devestated devastated +devestating devastating +devide divide +devided divided +devistating devastating +devolopement development +diablical diabolical +diamons diamonds +diaster disaster +dichtomy dichotomy +diconnects disconnects +dicover discover +dicovered discovered +dicovering discovering +dicovers discovers +dicovery discovery +dicussed discussed +didnt didn't +diea idea, die +dieing dying, dyeing +dieties deities +diety deity +diferent different +diferrent different +differentiatiations differentiations +differnt different +difficulity difficulty +diffrent different +dificulties difficulties +dificulty difficulty +dimenions dimensions +dimention dimension +dimentional dimensional +dimentions dimensions +dimesnional dimensional +diminuitive diminutive +diosese diocese +diphtong diphthong +diphtongs diphthongs +diplomancy diplomacy +dipthong diphthong +dipthongs diphthongs +dirived derived +disagreeed disagreed +disapeared disappeared +disapointing disappointing +disappearred disappeared +disaproval disapproval +disasterous disastrous +disatisfaction dissatisfaction +disatisfied dissatisfied +disatrous disastrous +discontentment discontent +discribe describe +discribed described +discribes describes +discribing describing +disctinction distinction +disctinctive distinctive +disemination dissemination +disenchanged disenchanted +disiplined disciplined +disobediance disobedience +disobediant disobedient +disolved dissolved +disover discover +dispair despair +disparingly disparagingly +dispence dispense +dispenced dispensed +dispencing dispensing +dispicable despicable +dispite despite +dispostion disposition +disproportiate disproportionate +disputandem disputandum +disricts districts +dissagreement disagreement +dissapear disappear +dissapearance disappearance +dissapeared disappeared +dissapearing disappearing +dissapears disappears +dissappear disappear +dissappears disappears +dissappointed disappointed +dissarray disarray +dissobediance disobedience +dissobediant disobedient +dissobedience disobedience +dissobedient disobedient +distiction distinction +distingish distinguish +distingished distinguished +distingishes distinguishes +distingishing distinguishing +distingquished distinguished +distrubution distribution +distruction destruction +distructive destructive +ditributed distributed +diversed diverse, diverged +divice device +divison division +divisons divisions +doccument document +doccumented documented +doccuments documents +docrines doctrines +doctines doctrines +documenatry documentary +doens does +doesnt doesn't +doign doing +dominaton domination +dominent dominant +dominiant dominant +donig doing +dosen't doesn't +doub doubt, daub +doulbe double +dowloads downloads +dramtic dramatic +draughtman draughtsman +Dravadian Dravidian +dreasm dreams +driectly directly +drnik drink +druming drumming +drummless drumless +dupicate duplicate +durig during +durring during +duting during +dyas dryas +eahc each +ealier earlier +earlies earliest +earnt earned +ecclectic eclectic +eceonomy economy +ecidious deciduous +eclispe eclipse +ecomonic economic +ect etc +eearly early +efel evil +effeciency efficiency +effecient efficient +effeciently efficiently +efficency efficiency +efficent efficient +efficently efficiently +efford effort, afford +effords efforts, affords +effulence effluence +eigth eighth, eight +eiter either +elction election +electic eclectic, electric +electon election, electron +electrial electrical +electricly electrically +electricty electricity +elementay elementary +eleminated eliminated +eleminating eliminating +eles eels +eletricity electricity +elicided elicited +eligable eligible +elimentary elementary +ellected elected +elphant elephant +embarass embarrass +embarassed embarrassed +embarassing embarrassing +embarassment embarrassment +embargos embargoes +embarras embarrass +embarrased embarrassed +embarrasing embarrassing +embarrasment embarrassment +embezelled embezzled +emblamatic emblematic +eminate emanate +eminated emanated +emision emission +emited emitted +emiting emitting +emition emission, emotion +emmediately immediately +emmigrated emigrated +emminent eminent, imminent +emminently eminently +emmisaries emissaries +emmisarries emissaries +emmisarry emissary +emmisary emissary +emmision emission +emmisions emissions +emmited emitted +emmiting emitting +emmitted emitted +emmitting emitting +emnity enmity +emperical empirical +emphaised emphasised +emphsis emphasis +emphysyma emphysema +empirial empirical, imperial +emprisoned imprisoned +enameld enameled +enchancement enhancement +encouraing encouraging +encryptiion encryption +encylopedia encyclopedia +endevors endeavors +endevour endeavour +endig ending +endolithes endoliths +enduce induce +ened need +enflamed inflamed +enforceing enforcing +engagment engagement +engeneer engineer +engeneering engineering +engieneer engineer +engieneers engineers +enlargment enlargement +enlargments enlargements +Enlish English, enlist +enourmous enormous +enourmously enormously +ensconsed ensconced +entaglements entanglements +enteratinment entertainment +entitity entity +entitlied entitled +entrepeneur entrepreneur +entrepeneurs entrepreneurs +enviorment environment +enviormental environmental +enviormentally environmentally +enviorments environments +enviornment environment +enviornmental environmental +enviornmentalist environmentalist +enviornmentally environmentally +enviornments environments +enviroment environment +enviromental environmental +enviromentalist environmentalist +enviromentally environmentally +enviroments environments +envolutionary evolutionary +envrionments environments +enxt next +epidsodes episodes +epsiode episode +equialent equivalent +equilibium equilibrium +equilibrum equilibrium +equiped equipped +equippment equipment +equitorial equatorial +equivelant equivalent +equivelent equivalent +equivilant equivalent +equivilent equivalent +equivlalent equivalent +erally orally, really +eratic erratic +eratically erratically +eraticly erratically +erested arrested, erected +errupted erupted +esential essential +esitmated estimated +esle else +especialy especially +essencial essential +essense essence +essentail essential +essentialy essentially +essentual essential +essesital essential +estabishes establishes +establising establishing +ethnocentricm ethnocentrism +ethose those, ethos +Europian European +Europians Europeans +Eurpean European +Eurpoean European +evenhtually eventually +eventally eventually +eventially eventually +eventualy eventually +everthing everything +everytime every time +everyting everything +eveyr every +evidentally evidently +exagerate exaggerate +exagerated exaggerated +exagerates exaggerates +exagerating exaggerating +exagerrate exaggerate +exagerrated exaggerated +exagerrates exaggerates +exagerrating exaggerating +examinated examined +exampt exempt +exapansion expansion +excact exact +excange exchange +excecute execute +excecuted executed +excecutes executes +excecuting executing +excecution execution +excedded exceeded +excelent excellent +excell excel +excellance excellence +excellant excellent +excells excels +excercise exercise +exchanching exchanging +excisted existed +exculsivly exclusively +execising exercising +exection execution +exectued executed +exeedingly exceedingly +exelent excellent +exellent excellent +exemple example +exept except +exeptional exceptional +exerbate exacerbate +exerbated exacerbated +exerciese exercises +exerpt excerpt +exerpts excerpts +exersize exercise +exerternal external +exhalted exalted +exhibtion exhibition +exibition exhibition +exibitions exhibitions +exicting exciting +exinct extinct +existance existence +existant existent +existince existence +exliled exiled +exludes excludes +exmaple example +exonorate exonerate +exoskelaton exoskeleton +expalin explain +expeced expected +expecially especially +expeditonary expeditionary +expeiments experiments +expell expel +expells expels +experiance experience +experianced experienced +expiditions expeditions +expierence experience +explaination explanation +explaning explaining +explictly explicitly +exploititive exploitative +explotation exploitation +expropiated expropriated +expropiation expropriation +exressed expressed +extemely extremely +extention extension +extentions extensions +extered exerted +extermist extremist +extint extinct, extant +extradiction extradition +extraterrestial extraterrestrial +extraterrestials extraterrestrials +extravagent extravagant +extrememly extremely +extremeophile extremophile +extremly extremely +extrordinarily extraordinarily +extrordinary extraordinary +eyar year, eyas +eyars years, eyas +eyasr years, eyas +faciliate facilitate +faciliated facilitated +faciliates facilitates +facilites facilities +facillitate facilitate +facinated fascinated +facist fascist +familes families +familliar familiar +famoust famous +fanatism fanaticism +Farenheit Fahrenheit +fatc fact +faught fought +favoutrable favourable +feasable feasible +Febuary February +fedreally federally +feromone pheromone +fertily fertility +fianite finite +fianlly finally +ficticious fictitious +fictious fictitious +fidn find +fiel feel, field, file, phial +fiels feels, fields, files, phials +fiercly fiercely +fightings fighting +filiament filament +fimilies families +finacial financial +finaly finally +financialy financially +firends friends +firts flirts, first +fisionable fissionable +flamable flammable +flawess flawless +fleed fled, freed +Flemmish Flemish +florescent fluorescent +flourescent fluorescent +fluorish flourish +follwoing following +folowing following +fomed formed +fomr from, form +fonetic phonetic +fontrier fontier +foootball football +forbad forbade +forbiden forbidden +foreward foreword +forfiet forfeit +forhead forehead +foriegn foreign +Formalhaut Fomalhaut +formallize formalize +formallized formalized +formaly formally +formelly formerly +formidible formidable +formost foremost +forsaw foresaw +forseeable foreseeable +fortelling foretelling +forunner forerunner +foucs focus +foudn found +fougth fought +foundaries foundries +foundary foundry +Foundland Newfoundland +fourties forties +fourty forty +fouth fourth +foward forward +fucntion function +fucntioning functioning +Fransiscan Franciscan +Fransiscans Franciscans +freind friend +freindly friendly +frequentily frequently +frome from +fromed formed +froniter frontier +fufill fulfill +fufilled fulfilled +fulfiled fulfilled +fundametal fundamental +fundametals fundamentals +funguses fungi +funtion function +furuther further +futher further +futhermore furthermore +futhroc futhark, futhorc +gae game, Gael, gale +galatic galactic +Galations Galatians +gallaxies galaxies +galvinized galvanized +Gameboy Game Boy +ganerate generate +ganes games +ganster gangster +garantee guarantee +garanteed guaranteed +garantees guarantees +garnison garrison +gauarana guaraná +gaurantee guarantee +gauranteed guaranteed +gaurantees guarantees +gaurd guard, gourd +gaurentee guarantee +gaurenteed guaranteed +gaurentees guarantees +geneological genealogical +geneologies genealogies +geneology genealogy +generaly generally +generatting generating +genialia genitalia +geographicial geographical +geometrician geometer +geometricians geometers +gerat great +Ghandi Gandhi +glight flight +gnawwed gnawed +godess goddess +godesses goddesses +Godounov Godunov +gogin going, Gauguin +goign going +gonig going +Gothenberg Gothenburg +Gottleib Gottlieb +gouvener governor +govement government +govenment government +govenrment government +goverance governance +goverment government +govermental governmental +governer governor +governmnet government +govorment government +govormental governmental +govornment government +gracefull graceful +graet great +grafitti graffiti +gramatically grammatically +grammaticaly grammatically +grammer grammar +grat great +gratuitious gratuitous +greatful grateful +greatfully gratefully +greif grief +gridles griddles +gropu group +grwo grow +Guaduloupe Guadalupe, Guadeloupe +Guadulupe Guadalupe, Guadeloupe +guage gauge +guarentee guarantee +guarenteed guaranteed +guarentees guarantees +Guatamala Guatemala +Guatamalan Guatemalan +guerilla guerrilla +guerillas guerrillas +guerrila guerrilla +guerrilas guerrillas +guidence guidance +Guilia Giulia +Guilio Giulio +Guiness Guinness +Guiseppe Giuseppe +gunanine guanine +gurantee guarantee +guranteed guaranteed +gurantees guarantees +guttaral guttural +gutteral guttural +habaeus habeas +habeus habeas +Habsbourg Habsburg +haemorrage haemorrhage +haev have, heave +Hallowean Hallowe'en, Halloween +halp help +hapen happen +hapened happened +hapening happening +happend happened +happended happened +happenned happened +harased harassed +harases harasses +harasment harassment +harasments harassments +harassement harassment +harras harass +harrased harassed +harrases harasses +harrasing harassing +harrasment harassment +harrasments harassments +harrassed harassed +harrasses harassed +harrassing harassing +harrassment harassment +harrassments harassments +hasnt hasn't +haviest heaviest +headquater headquarter +headquarer headquarter +headquatered headquartered +headquaters headquarters +healthercare healthcare +heared heard +heathy healthy +Heidelburg Heidelberg +heigher higher +heirarchy hierarchy +heiroglyphics hieroglyphics +helment helmet +helpfull helpful +helpped helped +hemmorhage hemorrhage +herad heard, Hera +heridity heredity +heroe hero +heros heroes +hertzs hertz +hesistant hesitant +heterogenous heterogeneous +hieght height +hierachical hierarchical +hierachies hierarchies +hierachy hierarchy +hierarcical hierarchical +hierarcy hierarchy +hieroglph hieroglyph +hieroglphs hieroglyphs +higer higher +higest highest +higway highway +hillarious hilarious +himselv himself +hinderance hindrance +hinderence hindrance +hindrence hindrance +hipopotamus hippopotamus +hismelf himself +histocompatability histocompatibility +historicians historians +hitsingles hit singles +holliday holiday +homestate home state +homogeneize homogenize +homogeneized homogenized +honory honorary +horrifing horrifying +hosited hoisted +hospitible hospitable +hounour honour +housr hours, house +howver however +hsitorians historians +hstory history +hten then, hen, the +htere there, here +htey they +htikn think +hting thing +htink think +htis this +humer humor, humour +humerous humorous, humerus +huminoid humanoid +humoural humoral +humurous humorous +husban husband +hvae have +hvaing having +hvea have, heave +hwihc which +hwile while +hwole whole +hydogen hydrogen +hydropile hydrophile +hydropilic hydrophilic +hydropobe hydrophobe +hydropobic hydrophobic +hygeine hygiene +hypocracy hypocrisy +hypocrasy hypocrisy +hypocricy hypocrisy +hypocrit hypocrite +hypocrits hypocrites +iconclastic iconoclastic +idaeidae idea +idaes ideas +idealogies ideologies +idealogy ideology +identicial identical +identifers identifiers +ideosyncratic idiosyncratic +idesa ideas, ides +idiosyncracy idiosyncrasy +Ihaca Ithaca +illegimacy illegitimacy +illegitmate illegitimate +illess illness +illiegal illegal +illution illusion +ilness illness +ilogical illogical +imagenary imaginary +imagin imagine +imaginery imaginary, imagery +imanent eminent, imminent +imcomplete incomplete +imediately immediately +imense immense +imigrant emigrant, immigrant +imigrated emigrated, immigrated +imigration emigration, immigration +iminent eminent, imminent, immanent +immediatley immediately +immediatly immediately +immidately immediately +immidiately immediately +immitate imitate +immitated imitated +immitating imitating +immitator imitator +immunosupressant immunosuppressant +impecabbly impeccably +impedence impedance +implamenting implementing +impliment implement +implimented implemented +imploys employs +importamt important +imprioned imprisoned +imprisonned imprisoned +improvision improvisation +improvments improvements +inablility inability +inaccessable inaccessible +inadiquate inadequate +inadquate inadequate +inadvertant inadvertent +inadvertantly inadvertently +inagurated inaugurated +inaguration inauguration +inappropiate inappropriate +inaugures inaugurates +inbalance imbalance +inbalanced imbalanced +inbetween between +incarcirated incarcerated +incidentially incidentally +incidently incidentally +inclreased increased +includ include +includng including +incompatabilities incompatibilities +incompatability incompatibility +incompatable incompatible +incompatablities incompatibilities +incompatablity incompatibility +incompatiblities incompatibilities +incompatiblity incompatibility +incompetance incompetence +incompetant incompetent +incomptable incompatible +incomptetent incompetent +inconsistant inconsistent +incorperation incorporation +incorportaed incorporated +incorprates incorporates +incorruptable incorruptible +incramentally incrementally +increadible incredible +incredable incredible +inctroduce introduce +inctroduced introduced +incuding including +incunabla incunabula +indefinately indefinitely +indefineable undefinable +indefinitly indefinitely +indentical identical +indepedantly independently +indepedence independence +independance independence +independant independent +independantly independently +independece independence +independendet independent +indictement indictment +indigineous indigenous +indipendence independence +indipendent independent +indipendently independently +indespensible indispensable +indespensable indispensable +indispensible indispensable +indisputible indisputable +indisputibly indisputably +indite indict +individualy individually +indpendent independent +indpendently independently +indulgue indulge +indutrial industrial +indviduals individuals +inefficienty inefficiently +inevatible inevitable +inevitible inevitable +inevititably inevitably +infalability infallibility +infallable infallible +infectuous infectious +infered inferred +infilitrate infiltrate +infilitrated infiltrated +infilitration infiltration +infinit infinite +inflamation inflammation +influencial influential +influented influenced +infomation information +informtion information +infrantryman infantryman +infrigement infringement +ingenius ingenious +ingreediants ingredients +inhabitans inhabitants +inherantly inherently +inheritage heritage, inheritance +inheritence inheritance +inital initial +initally initially +initation initiation +initiaitive initiative +inlcuding including +inmigrant immigrant +inmigrants immigrants +innoculated inoculated +inocence innocence +inofficial unofficial +inot into +inpeach impeach +inpolite impolite +inprisonment imprisonment +inproving improving +insectiverous insectivorous +insensative insensitive +inseperable inseparable +insistance insistence +insitution institution +insitutions institutions +inspite in spite, inspire +instade instead +instatance instance +institue institute +instuction instruction +instuments instruments +instutionalized institutionalized +instutions intuitions +insurence insurance +intelectual intellectual +inteligence intelligence +inteligent intelligent +intenational international +intepretation interpretation +intepretator interpretor +interational international +interbread interbreed, interbred +interchangable interchangeable +interchangably interchangeably +intercontinetal intercontinental +intered interred, interned +interelated interrelated +interferance interference +interfereing interfering +intergrated integrated +intergration integration +interm interim +internation international +interpet interpret +interrim interim +interrugum interregnum +intertaining entertaining +interupt interrupt +intervines intervenes +intevene intervene +intial initial +intially initially +intrduced introduced +intrest interest +introdued introduced +intruduced introduced +intrusted entrusted +intutive intuitive +intutively intuitively +inudstry industry +inumerable enumerable, innumerable +inventer inventor +invertibrates invertebrates +investingate investigate +involvment involvement +irelevent irrelevant +iresistable irresistible +iresistably irresistibly +iresistible irresistible +iresistibly irresistibly +iritable irritable +iritated irritated +ironicly ironically +irregardless regardless +irrelevent irrelevant +irreplacable irreplaceable +irresistable irresistible +irresistably irresistibly +isnt isn't +Israelies Israelis +issueing issuing +itnroduced introduced +iunior junior +iwll will +iwth with +Japanes Japanese +jaques jacques +jeapardy jeopardy +jewllery jewellery +Johanine Johannine +Jospeh Joseph +jouney journey +journied journeyed +journies journeys +jstu just +jsut just +Juadaism Judaism +Juadism Judaism +judical judicial +judisuary judiciary +juducial judicial +juristiction jurisdiction +juristictions jurisdictions +kindergarden kindergarten +klenex kleenex +knifes knives +knive knife +knowlege knowledge +knowlegeable knowledgeable +knwo know +knwos knows +konw know +konws knows +kwno know +labatory lavatory, laboratory +labled labelled, labeled +labratory laboratory +laguage language +laguages languages +larg large +largst largest +larrry larry +lastr last +lattitude latitude +launchs launch +launhed launched +lavae larvae +layed laid +lazyness laziness +leaded led +leage league +leanr lean, learn, leaner +leathal lethal +lefted left +legitamate legitimate +legitmate legitimate +leibnitz leibniz +lenght length +leran learn +lerans learns +lieuenant lieutenant +leutenant lieutenant +levetate levitate +levetated levitated +levetates levitates +levetating levitating +levle level +liasion liaison +liason liaison +liasons liaisons +libary library +libell libel +libguistic linguistic +libguistics linguistics +libitarianisn libertarianism +lible libel, liable +lieing lying +liek like +liekd liked +liesure leisure +lieved lived +liftime lifetime +lightyear light year +lightyears light years +likelyhood likelihood +linnaena linnaean +lippizaner lipizzaner +liquify liquefy +liscense license, licence +lisence license, licence +lisense license, licence +listners listeners +litature literature +literture literature +littel little +litterally literally +liuke like +livley lively +lmits limits +loev love +lonelyness loneliness +longitudonal longitudinal +lonley lonely +lonly lonely, only +loosing losing +lotharingen lothringen +lsat last +lukid likud +lveo love +lvoe love +Lybia Libya +mackeral mackerel +magasine magazine +magincian magician +magnificient magnificent +magolia magnolia +mailny mainly +maintainance maintenance +maintainence maintenance +maintance maintenance +maintenence maintenance +maintinaing maintaining +maintioned mentioned +majoroty majority +maked marked, made +makse makes +Malcom Malcolm +maltesian Maltese +mamal mammal +mamalian mammalian +managable manageable, manageably +managment management +manisfestations manifestations +manoeuverability maneuverability +manouver maneuver, manoeuvre +manouverability maneuverability, manoeuvrability, manoeuverability +manouverable maneuverable, manoeuvrable +manouvers maneuvers, manoeuvres +mantained maintained +manuever maneuver, manoeuvre +manuevers maneuvers, manoeuvres +manufacturedd manufactured +manufature manufacture +manufatured manufactured +manufaturing manufacturing +manuver maneuver +mariage marriage +marjority majority +markes marks +marketting marketing +marmelade marmalade +marrage marriage +marraige marriage +marrtyred martyred +marryied married +Massachussets Massachusetts +Massachussetts Massachusetts +massmedia mass media +masterbation masturbation +mataphysical metaphysical +materalists materialist +mathamatics mathematics +mathematican mathematician +mathematicas mathematics +matheticians mathematicians +mathmatically mathematically +mathmatician mathematician +mathmaticians mathematicians +mccarthyst mccarthyist +mchanics mechanics +meaninng meaning +mear wear, mere, mare +mechandise merchandise +medacine medicine +medeival medieval +medevial medieval +mediciney mediciny +medievel medieval +mediterainnean mediterranean +Mediteranean Mediterranean +meerkrat meerkat +melieux milieux +membranaphone membranophone +memeber member +menally mentally +meranda veranda, Miranda +mercentile mercantile +messanger messenger +messenging messaging +metalic metallic +metalurgic metallurgic +metalurgical metallurgical +metalurgy metallurgy +metamorphysis metamorphosis +metaphoricial metaphorical +meterologist meteorologist +meterology meteorology +methaphor metaphor +methaphors metaphors +Michagan Michigan +micoscopy microscopy +midwifes midwives +mileau milieu +milennia millennia +milennium millennium +mileu milieu +miliary military +milion million +miliraty military +millenia millennia +millenial millennial +millenialism millennialism +millenium millennium +millepede millipede +millioniare millionaire +millitary military +millon million +miltary military +minature miniature +minerial mineral +miniscule minuscule +ministery ministry +minstries ministries +minstry ministry +minumum minimum +mirrorred mirrored +miscelaneous miscellaneous +miscellanious miscellaneous +miscellanous miscellaneous +mischeivous mischievous +mischevious mischievous +mischievious mischievous +misdameanor misdemeanor +misdameanors misdemeanors +misdemenor misdemeanor +misdemenors misdemeanors +misfourtunes misfortunes +misile missile +Misouri Missouri +mispell misspell +mispelled misspelled +mispelling misspelling +missen mizzen +Missisipi Mississippi +Missisippi Mississippi +missle missile +missonary missionary +misterious mysterious +mistery mystery +misteryous mysterious +mkae make +mkaes makes +mkaing making +mkea make +moderm modem +modle model +moent moment +moeny money +mohammedans muslims +moil mohel +moleclues molecules +momento memento +monestaries monasteries +monestary monastery, monetary +monickers monikers +monolite monolithic +Monserrat Montserrat +montains mountains +montanous mountainous +monts months +montypic monotypic +moreso more, more so +morgage mortgage +Morisette Morissette +Morrisette Morissette +morroccan moroccan +morrocco morocco +morroco morocco +mosture moisture +motiviated motivated +mounth month +movei movie +movment movement +mroe more +mucuous mucous +muder murder +mudering murdering +muhammadan muslim +multicultralism multiculturalism +multipled multiplied +multiplers multipliers +munbers numbers +muncipalities municipalities +muncipality municipality +munnicipality municipality +muscels mussels, muscles +muscial musical +muscician musician +muscicians musicians +mutiliated mutilated +myraid myriad +mysef myself +mysogynist misogynist +mysogyny misogyny +mysterous mysterious +Mythraic Mithraic +naieve naive +Napoleonian Napoleonic +naturaly naturally +naturely naturally +naturual natural +naturually naturally +Nazereth Nazareth +neccesarily necessarily +neccesary necessary +neccessarily necessarily +neccessary necessary +neccessities necessities +necesarily necessarily +necesary necessary +necessiate necessitate +neglible negligible +negligable negligible +negociate negotiate +negociation negotiation +negociations negotiations +negotation negotiation +neice niece, nice +neigborhood neighborhood +neigbour neighbour, neighbor +neigbourhood neighbourhood +neigbouring neighbouring, neighboring +neigbours neighbours, neighbors +neolitic neolithic +nessasarily necessarily +nessecary necessary +nestin nesting +neverthless nevertheless +newletters newsletters +Newyorker New Yorker +nickle nickel +nightfa;; nightfall +nightime nighttime +nineth ninth +ninteenth nineteenth +ninties 1990s +ninty ninety +nkow know +nkwo know +nmae name +noncombatents noncombatants +nonsence nonsense +nontheless nonetheless +noone no one +norhern northern +northen northern +northereastern northeastern +notabley notably +noteable notable +noteably notably +noteriety notoriety +noth north +nothern northern +noticable noticeable +noticably noticeably +noticeing noticing +noticible noticeable +notwhithstanding notwithstanding +noveau nouveau +nowdays nowadays +nowe now +nto not +nucular nuclear +nuculear nuclear +nuisanse nuisance +Nullabour Nullarbor +numberous numerous +Nuremburg Nuremberg +nusance nuisance +nutritent nutrient +nutritents nutrients +nuturing nurturing +obediance obedience +obediant obedient +obession obsession +obssessed obsessed +obstacal obstacle +obstancles obstacles +obstruced obstructed +ocasion occasion +ocasional occasional +ocasionally occasionally +ocasionaly occasionally +ocasioned occasioned +ocasions occasions +ocassion occasion +ocassional occasional +ocassionally occasionally +ocassionaly occasionally +ocassioned occasioned +ocassions occasions +occaison occasion +occassion occasion +occassional occasional +occassionally occasionally +occassionaly occasionally +occassioned occasioned +occassions occasions +occationally occasionally +occour occur +occurance occurrence +occurances occurrences +occured occurred +occurence occurrence +occurences occurrences +occuring occurring +occurr occur +occurrance occurrence +occurrances occurrences +octohedra octahedra +octohedral octahedral +octohedron octahedron +ocuntries countries +ocuntry country +ocurr occur +ocurrance occurrence +ocurred occurred +ocurrence occurrence +offcers officers +offcially officially +offereings offerings +offical official +officals officials +offically officially +officaly officially +officialy officially +offred offered +oftenly often +oging going, ogling +omision omission +omited omitted +omiting omitting +omlette omelette +ommision omission +ommited omitted +ommiting omitting +ommitted omitted +ommitting omitting +omniverous omnivorous +omniverously omnivorously +omre more +onot note, not +onyl only +openess openness +oponent opponent +oportunity opportunity +opose oppose +oposite opposite +oposition opposition +oppenly openly +oppinion opinion +opponant opponent +oppononent opponent +oppositition opposition +oppossed opposed +opprotunity opportunity +opression oppression +opressive oppressive +opthalmic ophthalmic +opthalmologist ophthalmologist +opthalmology ophthalmology +opthamologist ophthalmologist +optmizations optimizations +optomism optimism +orded ordered +organim organism +organiztion organization +orgin origin, organ +orginal original +orginally originally +orginize organise +oridinarily ordinarily +origanaly originally +originall original, originally +originaly originally +originially originally +originnally originally +origional original +orignally originally +orignially originally +otehr other +ouevre oeuvre +overshaddowed overshadowed +overthere over there +overwelming overwhelming +overwheliming overwhelming +owrk work +owudl would +oxigen oxygen +oximoron oxymoron +paide paid +paitience patience +palce place, palace +paleolitic paleolithic +paliamentarian parliamentarian +Palistian Palestinian +Palistinian Palestinian +Palistinians Palestinians +pallete palette +pamflet pamphlet +pamplet pamphlet +pantomine pantomime +Papanicalou Papanicolaou +paralel parallel +paralell parallel +paralelly parallelly +paralely parallelly +parallely parallelly +paranthesis parenthesis +paraphenalia paraphernalia +parellels parallels +parituclar particular +parliment parliament +parrakeets parakeets +parralel parallel +parrallel parallel +parrallell parallel +parrallelly parallelly +parrallely parallelly +partialy partially +particually particularly +particualr particular +particuarly particularly +particularily particularly +particulary particularly +pary party +pased passed +pasengers passengers +passerbys passersby +pasttime pastime +pastural pastoral +paticular particular +pattented patented +pavillion pavilion +payed paid +peacefuland peaceful and +peageant pageant +peculure peculiar +pedestrain pedestrian +peice piece +Peloponnes Peloponnesus +penatly penalty +penerator penetrator +penisula peninsula +penisular peninsular +penninsula peninsula +penninsular peninsular +pennisula peninsula +pensinula peninsula +peom poem +peoms poems +peopel people +peotry poetry +perade parade +percepted perceived +percieve perceive +percieved perceived +perenially perennially +perfomers performers +performence performance +performes performed, performs +perhasp perhaps +perheaps perhaps +perhpas perhaps +peripathetic peripatetic +peristent persistent +perjery perjury +perjorative pejorative +permanant permanent +permenant permanent +permenantly permanently +permissable permissible +perogative prerogative +peronal personal +perosnality personality +perphas perhaps +perpindicular perpendicular +perseverence perseverance +persistance persistence +persistant persistent +personel personnel, personal +personell personnel +personnell personnel +persuded persuaded +persue pursue +persued pursued +persuing pursuing +persuit pursuit +persuits pursuits +pertubation perturbation +pertubations perturbations +pessiary pessary +petetion petition +Pharoah Pharaoh +phenomenom phenomenon +phenomenonal phenomenal +phenomenonly phenomenally +phenomonenon phenomenon +phenomonon phenomenon +phenonmena phenomena +Philipines Philippines +philisopher philosopher +philisophical philosophical +philisophy philosophy +Phillipine Philippine +Phillipines Philippines +Phillippines Philippines +phillosophically philosophically +philospher philosopher +philosphies philosophies +philosphy philosophy +Phonecian Phoenecian +phongraph phonograph +phylosophical philosophical +physicaly physically +pich pitch +pilgrimmage pilgrimage +pilgrimmages pilgrimages +pinapple pineapple +pinnaple pineapple +pinoneered pioneered +plagarism plagiarism +planation plantation +planed planned +plantiff plaintiff +plateu plateau +plausable plausible +playright playwright +playwrite playwright +playwrites playwrights +pleasent pleasant +plebicite plebiscite +plesant pleasant +poeoples peoples +poety poetry +poisin poison +polical political +polinator pollinator +polinators pollinators +politican politician +politicans politicians +poltical political +polute pollute +poluted polluted +polutes pollutes +poluting polluting +polution pollution +polyphonyic polyphonic +polysaccaride polysaccharide +polysaccharid polysaccharide +pomegranite pomegranate +pomotion promotion +poportional proportional +popoulation population +popularaty popularity +populare popular +populer popular +portayed portrayed +portraing portraying +Portugese Portuguese +portuguease portuguese +posess possess +posessed possessed +posesses possesses +posessing possessing +posession possession +posessions possessions +posion poison +positon position, positron +possable possible +possably possibly +posseses possesses +possesing possessing +possesion possession +possessess possesses +possibile possible +possibilty possibility +possiblility possibility +possiblilty possibility +possiblities possibilities +possiblity possibility +possition position +Postdam Potsdam +posthomous posthumous +postion position +postive positive +potatos potatoes +portait portrait +potrait portrait +potrayed portrayed +poulations populations +poverful powerful +poweful powerful +powerfull powerful +practial practical +practially practically +practicaly practically +practicioner practitioner +practicioners practitioners +practicly practically +practioner practitioner +practioners practitioners +prairy prairie +prarie prairie +praries prairies +pratice practice +preample preamble +precedessor predecessor +preceed precede +preceeded preceded +preceeding preceding +preceeds precedes +precentage percentage +precice precise +precisly precisely +precurser precursor +predecesors predecessors +predicatble predictable +predicitons predictions +predomiantly predominately +prefered preferred +prefering preferring +preferrably preferably +pregancies pregnancies +preiod period +preliferation proliferation +premeire premiere +premeired premiered +premillenial premillennial +preminence preeminence +premission permission +Premonasterians Premonstratensians +preocupation preoccupation +prepair prepare +prepartion preparation +prepatory preparatory +preperation preparation +preperations preparations +preriod period +presedential presidential +presense presence +presidenital presidential +presidental presidential +presitgious prestigious +prespective perspective +prestigeous prestigious +prestigous prestigious +presumabely presumably +presumibly presumably +pretection protection +prevelant prevalent +preverse perverse +previvous previous +pricipal principal +priciple principle +priestood priesthood +primarly primarily +primative primitive +primatively primitively +primatives primitives +primordal primordial +priveledges privileges +privelege privilege +priveleged privileged +priveleges privileges +privelige privilege +priveliged privileged +priveliges privileges +privelleges privileges +privilage privilege +priviledge privilege +priviledges privileges +privledge privilege +privte private +probabilaty probability +probablistic probabilistic +probablly probably +probalibity probability +probaly probably +probelm problem +proccess process +proccessing processing +procede proceed, precede +proceded proceeded, preceded +procedes proceeds, precedes +procedger procedure +proceding proceeding, preceding +procedings proceedings +proceedure procedure +proces process +processer processor +proclaimation proclamation +proclamed proclaimed +proclaming proclaiming +proclomation proclamation +profesion profusion, profession +profesor professor +professer professor +proffesed professed +proffesion profession +proffesional professional +proffesor professor +profilic prolific +progessed progressed +programable programmable +progrom pogrom, program +progroms pogroms, programs +prohabition prohibition +prologomena prolegomena +prominance prominence +prominant prominent +prominantly prominently +prominately prominently, predominately +promiscous promiscuous +promotted promoted +pronomial pronominal +pronouced pronounced +pronounched pronounced +pronounciation pronunciation +proove prove +prooved proved +prophacy prophecy +propietary proprietary +propmted prompted +propoganda propaganda +propogate propagate +propogates propagates +propogation propagation +propostion proposition +propotions proportions +propper proper +propperly properly +proprietory proprietary +proseletyzing proselytizing +protaganist protagonist +protaganists protagonists +protocal protocol +protoganist protagonist +protrayed portrayed +protruberance protuberance +protruberances protuberances +prouncements pronouncements +provacative provocative +provded provided +provicial provincial +provinicial provincial +provisonal provisional +provisiosn provision +proximty proximity +pseudononymous pseudonymous +pseudonyn pseudonym +psuedo pseudo +psycology psychology +psyhic psychic +publicaly publicly +puchasing purchasing +Pucini Puccini +Puertorrican Puerto Rican +Puertorricans Puerto Ricans +pumkin pumpkin +puritannical puritanical +purposedly purposely +purpotedly purportedly +pursuade persuade +pursuaded persuaded +pursuades persuades +pususading persuading +puting putting +pwoer power +pyscic psychic +qtuie quite, quiet +quantaty quantity +quantitiy quantity +quarantaine quarantine +Queenland Queensland +questonable questionable +quicklyu quickly +quinessential quintessential +quitted quit +quizes quizzes +qutie quite, quiet +rabinnical rabbinical +racaus raucous +radiactive radioactive +radify ratify +raelly really +rarified rarefied +reaccurring recurring +reacing reaching +reacll recall +readmition readmission +realitvely relatively +realsitic realistic +realtions relations +realy really +realyl really +reasearch research +rebiulding rebuilding +rebllions rebellions +rebounce rebound +reccomend recommend +reccomendations recommendations +reccomended recommended +reccomending recommending +reccommend recommend +reccommended recommended +reccommending recommending +reccuring recurring +receeded receded +receeding receding +receivedfrom received from +recepient recipient +recepients recipients +receving receiving +rechargable rechargeable +reched reached +recide reside +recided resided +recident resident +recidents residents +reciding residing +reciepents recipients +reciept receipt +recieve receive +recieved received +reciever receiver +recievers receivers +recieves receives +recieving receiving +recipiant recipient +recipiants recipients +recived received +recivership receivership +recogise recognise +recogize recognize +recomend recommend +recomended recommended +recomending recommending +recomends recommends +recommedations recommendations +reconaissance reconnaissance +reconcilation reconciliation +reconized recognized +reconnaissence reconnaissance +recontructed reconstructed +recordproducer record producer +recquired required +recrational recreational +recrod record +recuiting recruiting +recuring recurring +recurrance recurrence +rediculous ridiculous +reedeming redeeming +reenforced reinforced +refect reflect +refedendum referendum +referal referral +refered referred +referiang referring +refering referring +refernces references +referrence reference +referrs refers +reffered referred +refference reference +refrence reference +refrences references +refrers refers +refridgeration refrigeration +refridgerator refrigerator +refromist reformist +refusla refusal +regardes regards +regluar regular +reguarly regularly +regulaion regulation +regulaotrs regulators +regularily regularly +rehersal rehearsal +reicarnation reincarnation +reigining reigning +reknown renown +reknowned renowned +rela real +relaly really +relatiopnship relationship +relativly relatively +relected reelected +releive relieve +releived relieved +releiver reliever +releses releases +relevence relevance +relevent relevant +reliablity reliability +relient reliant +religeous religious +religous religious +religously religiously +relinqushment relinquishment +relitavely relatively +relized realised, realized +relpacement replacement +remaing remaining +remeber remember +rememberable memorable +rememberance remembrance +remembrence remembrance +remenant remnant +remenicent reminiscent +reminent remnant +reminescent reminiscent +reminscent reminiscent +reminsicent reminiscent +rendevous rendezvous +rendezous rendezvous +renedered rende +renewl renewal +rentors renters +reoccurrence recurrence +reorganision reorganisation +repatition repetition, repartition +repentence repentance +repentent repentant +repeteadly repeatedly +repetion repetition +repid rapid +reponse response +reponsible responsible +reportadly reportedly +represantative representative +representive representative +representives representatives +reproducable reproducible +reprtoire repertoire +repsectively respectively +reptition repetition +requirment requirement +requred required +resaurant restaurant +resembelance resemblance +resembes resembles +resemblence resemblance +resevoir reservoir +resignement resignment +resistable resistible +resistence resistance +resistent resistant +respectivly respectively +responce response +responibilities responsibilities +responisble responsible +responnsibilty responsibility +responsability responsibility +responsibile responsible +responsibilites responsibilities +responsiblity responsibility +ressemblance resemblance +ressemble resemble +ressembled resembled +ressemblence resemblance +ressembling resembling +resssurecting resurrecting +ressurect resurrect +ressurected resurrected +ressurection resurrection +ressurrection resurrection +restaraunt restaurant +restaraunteur restaurateur +restaraunteurs restaurateurs +restaraunts restaurants +restauranteurs restaurateurs +restauration restoration +restauraunt restaurant +resteraunt restaurant +resteraunts restaurants +resticted restricted +restraunt restraint, restaurant +resturant restaurant +resturaunt restaurant +resurecting resurrecting +retalitated retaliated +retalitation retaliation +retreive retrieve +returnd returned +revaluated reevaluated +reveral reversal +reversable reversible +revolutionar revolutionary +rewitten rewritten +rewriet rewrite +rhymme rhyme +rhythem rhythm +rhythim rhythm +rhytmic rhythmic +rigeur rigueur, rigour, rigor +rigourous rigorous +rininging ringing +rised rose +Rockerfeller Rockefeller +rococco rococo +rocord record +roomate roommate +rougly roughly +rucuperate recuperate +rudimentatry rudimentary +rulle rule +runing running +runnung running +russina Russian +Russion Russian +rwite write +rythem rhythm +rythim rhythm +rythm rhythm +rythmic rhythmic +rythyms rhythms +sacrafice sacrifice +sacreligious sacrilegious +sacrifical sacrificial +saftey safety +safty safety +salery salary +sanctionning sanctioning +sandwhich sandwich +Sanhedrim Sanhedrin +santioned sanctioned +sargant sergeant +sargeant sergeant +sasy says, sassy +satelite satellite +satelites satellites +Saterday Saturday +Saterdays Saturdays +satisfactority satisfactorily +satric satiric +satrical satirical +satrically satirically +sattelite satellite +sattelites satellites +saught sought +saveing saving +saxaphone saxophone +scaleable scalable +scandanavia Scandinavia +scaricity scarcity +scavanged scavenged +schedual schedule +scholarhip scholarship +scholarstic scholastic, scholarly +scientfic scientific +scientifc scientific +scientis scientist +scince science +scinece science +scirpt script +scoll scroll +screenwrighter screenwriter +scrutinity scrutiny +scuptures sculptures +seach search +seached searched +seaches searches +secceeded seceded, succeeded +seceed succeed, secede +seceeded succeeded, seceded +secratary secretary +secretery secretary +sedereal sidereal +seeked sought +segementation segmentation +seguoys segues +seige siege +seing seeing +seinor senior +seldomly seldom +senarios scenarios +sence sense +senstive sensitive +sensure censure +seperate separate +seperated separated +seperately separately +seperates separates +seperating separating +seperation separation +seperatism separatism +seperatist separatist +sepina subpoena +sepulchure sepulchre, sepulcher +sepulcre sepulchre, sepulcher +sergent sergeant +settelement settlement +settlment settlement +severeal several +severley severely +severly severely +sevice service +shaddow shadow +shamen shaman, shamans +sheat sheath, sheet, cheat +sheild shield +sherif sheriff +shineing shining +shiped shipped +shiping shipping +shopkeeepers shopkeepers +shorly shortly +shortwhile short while +shoudl should +shoudln should, shouldn't +shouldnt shouldn't +shreak shriek +shrinked shrunk +sicne since +sideral sidereal +sieze seize, size +siezed seized, sized +siezing seizing, sizing +siezure seizure +siezures seizures +siginificant significant +signficant significant +signficiant significant +signfies signifies +signifantly significantly +significently significantly +signifigant significant +signifigantly significantly +signitories signatories +signitory signatory +similarily similarly +similiar similar +similiarity similarity +similiarly similarly +simmilar similar +simpley simply +simplier simpler +simultanous simultaneous +simultanously simultaneously +sincerley sincerely +singsog singsong +sinse sines, since +Sionist Zionist +Sionists Zionists +Sixtin Sistine +Skagerak Skagerrak +skateing skating +slaugterhouses slaughterhouses +slowy slowly +smae same +smealting smelting +smoe some +sneeks sneaks +snese sneeze +socalism socialism +socities societies +soem some +sofware software +sohw show +soilders soldiers +solatary solitary +soley solely +soliders soldiers +soliliquy soliloquy +soluable soluble +somene someone +somtimes sometimes +somwhere somewhere +sophicated sophisticated +sorceror sorcerer +sorrounding surrounding +sotry story +sotyr satyr, story +soudn sound +soudns sounds +sould could, should, sold +sountrack soundtrack +sourth south +sourthern southern +souvenier souvenir +souveniers souvenirs +soveits soviets +sovereignity sovereignty +soverign sovereign +soverignity sovereignty +soverignty sovereignty +spainish Spanish +speach speech +specfic specific +speciallized specialised, specialized +specif specific, specify +specifiying specifying +speciman specimen +spectauclar spectacular +spectaulars spectaculars +spects aspects, expects +spectum spectrum +speices species +spendour splendour +spermatozoan spermatozoon +spoace space +sponser sponsor +sponsered sponsored +spontanous spontaneous +sponzored sponsored +spoonfulls spoonfuls +sppeches speeches +spreaded spread +sprech speech +spred spread +spriritual spiritual +spritual spiritual +sqaure square +stablility stability +stainlees stainless +staion station +standars standards +stange strange +startegic strategic +startegies strategies +startegy strategy +stateman statesman +statememts statements +statment statement +steriods steroids +sterotypes stereotypes +stilus stylus +stingent stringent +stiring stirring +stirrs stirs +stlye style +stong strong +stopry story +storeis stories +storise stories +stornegst strongest +stoyr story +stpo stop +stradegies strategies +stradegy strategy +strat start, strata +stratagically strategically +streemlining streamlining +stregth strength +strenghen strengthen +strenghened strengthened +strenghening strengthening +strenght strength +strenghten strengthen +strenghtened strengthened +strenghtening strengthening +strengtened strengthened +strenous strenuous +strictist strictest +strikely strikingly +strnad strand +stroy story, destroy +structual structural +stubborness stubbornness +stucture structure +stuctured structured +studdy study +studing studying +stuggling struggling +sturcture structure +subcatagories subcategories +subcatagory subcategory +subconsiously subconsciously +subjudgation subjugation +submachne submachine +subpecies subspecies +subsidary subsidiary +subsiduary subsidiary +subsquent subsequent +subsquently subsequently +substace substance +substancial substantial +substatial substantial +substituded substituted +substract subtract +substracted subtracted +substracting subtracting +substraction subtraction +substracts subtracts +subtances substances +subterranian subterranean +suburburban suburban +succceeded succeeded +succcesses successes +succedded succeeded +succeded succeeded +succeds succeeds +succesful successful +succesfully successfully +succesfuly successfully +succesion succession +succesive successive +successfull successful +successully successfully +succsess success +succsessfull successful +suceed succeed +suceeded succeeded +suceeding succeeding +suceeds succeeds +sucesful successful +sucesfully successfully +sucesfuly successfully +sucesion succession +sucess success +sucesses successes +sucessful successful +sucessfull successful +sucessfully successfully +sucessfuly successfully +sucession succession +sucessive successive +sucessor successor +sucessot successor +sucide suicide +sucidial suicidal +sufferage suffrage +sufferred suffered +sufferring suffering +sufficent sufficient +sufficently sufficiently +sumary summary +sunglases sunglasses +suop soup +superceeded superseded +superintendant superintendent +suphisticated sophisticated +suplimented supplemented +supose suppose +suposed supposed +suposedly supposedly +suposes supposes +suposing supposing +supplamented supplemented +suppliementing supplementing +suppoed supposed +supposingly supposedly +suppy supply +supress suppress +supressed suppressed +supresses suppresses +supressing suppressing +suprise surprise +suprised surprised +suprising surprising +suprisingly surprisingly +suprize surprise +suprized surprised +suprizing surprising +suprizingly surprisingly +surfce surface +surley surly, surely +suround surround +surounded surrounded +surounding surrounding +suroundings surroundings +surounds surrounds +surplanted supplanted +surpress suppress +surpressed suppressed +surprize surprise +surprized surprised +surprizing surprising +surprizingly surprisingly +surrended surrounded, surrendered +surrepetitious surreptitious +surrepetitiously surreptitiously +surreptious surreptitious +surreptiously surreptitiously +surronded surrounded +surrouded surrounded +surrouding surrounding +surrundering surrendering +surveilence surveillance +surveill surveil +surveyer surveyor +surviver survivor +survivers survivors +survivied survived +suseptable susceptible +suseptible susceptible +suspention suspension +swaer swear +swaers swears +swepth swept +swiming swimming +syas says +symetrical symmetrical +symetrically symmetrically +symetry symmetry +symettric symmetric +symmetral symmetric +symmetricaly symmetrically +synagouge synagogue +syncronization synchronization +synonomous synonymous +synonymns synonyms +synphony symphony +syphyllis syphilis +sypmtoms symptoms +syrap syrup +sysmatically systematically +sytem system +sytle style +tabacco tobacco +tahn than +taht that +talekd talked +targetted targeted +targetting targeting +tast taste +tath that +tattooes tattoos +taxanomic taxonomic +taxanomy taxonomy +teached taught +techician technician +techicians technicians +techiniques techniques +technitian technician +technnology technology +technolgy technology +teh the +tehy they +telelevision television +televsion television +telphony telephony +temerature temperature +temparate temperate +temperarily temporarily +temperment temperament +tempertaure temperature +temperture temperature +temprary temporary +tenacle tentacle +tenacles tentacles +tendacy tendency +tendancies tendencies +tendancy tendency +tennisplayer tennis player +tepmorarily temporarily +terrestial terrestrial +terriories territories +terriory territory +territorist terrorist +territoy territory +terroist terrorist +testiclular testicular +tghe the +thast that, that's +theather theater, theatre +theese these +theif thief +theives thieves +themselfs themselves +themslves themselves +ther there, their, the +therafter thereafter +therby thereby +theri their +thgat that +thge the +thier their +thign thing +thigns things +thigsn things +thikn think +thikning thinking, thickening +thikns thinks +thiunk think +thn then +thna than +thne then +thnig thing +thnigs things +thoughout throughout +threatend threatened +threatning threatening +threee three +threshhold threshold +thrid third +throrough thorough +throughly thoroughly +throught thought, through, throughout +througout throughout +thru through +thsi this +thsoe those +thta that +thyat that +tiem time, Tim +tihkn think +tihs this +timne time +tiome time, tome +tje the +tjhe the +tjpanishad upanishad +tkae take +tkaes takes +tkaing taking +tlaking talking +tobbaco tobacco +todays today's +todya today +toghether together +tolerence tolerance +Tolkein Tolkien +tomatos tomatoes +tommorow tomorrow +tommorrow tomorrow +tongiht tonight +toriodal toroidal +tormenters tormentors +torpeados torpedoes +torpedos torpedoes +tothe to the +toubles troubles +tounge tongue +tourch torch, touch +towords towards +towrad toward +tradionally traditionally +traditionaly traditionally +traditionnal traditional +traditition tradition +tradtionally traditionally +trafficed trafficked +trafficing trafficking +trafic traffic +trancendent transcendent +trancending transcending +tranform transform +tranformed transformed +transcendance transcendence +transcendant transcendent +transcendentational transcendental +transcripting transcribing, transcription +transending transcending +transesxuals transsexuals +transfered transferred +transfering transferring +transformaton transformation +transistion transition +translater translator +translaters translators +transmissable transmissible +transporation transportation +tremelo tremolo +tremelos tremolos +triguered triggered +triology trilogy +troling trolling +troup troupe +troups troupes, troops +truely truly +trustworthyness trustworthiness +turnk turnkey, trunk +Tuscon Tucson +tust trust +twelth twelfth +twon town +twpo two +tyhat that +tyhe they +typcial typical +typicaly typically +tyranies tyrannies +tyrany tyranny +tyrranies tyrannies +tyrrany tyranny +ubiquitious ubiquitous +uise use +Ukranian Ukrainian +ultimely ultimately +unacompanied unaccompanied +unahppy unhappy +unanymous unanimous +unathorised unauthorised +unavailible unavailable +unballance unbalance +unbeleivable unbelievable +uncertainity uncertainty +unchallengable unchallengeable +unchangable unchangeable +uncompetive uncompetitive +unconcious unconscious +unconciousness unconsciousness +unconfortability discomfort +uncontitutional unconstitutional +unconvential unconventional +undecideable undecidable +understoon understood +undesireable undesirable +undetecable undetectable +undoubtely undoubtedly +undreground underground +uneccesary unnecessary +unecessary unnecessary +unequalities inequalities +unforetunately unfortunately +unforgetable unforgettable +unforgiveable unforgivable +unfortunatley unfortunately +unfortunatly unfortunately +unfourtunately unfortunately +unihabited uninhabited +unilateraly unilaterally +unilatreal unilateral +unilatreally unilaterally +uninterruped uninterrupted +uninterupted uninterrupted +UnitesStates UnitedStates +univeral universal +univeristies universities +univeristy university +universtiy university +univesities universities +univesity university +unkown unknown +unlikey unlikely +unmanouverable unmaneuverable, unmanoeuvrable +unmistakeably unmistakably +unneccesarily unnecessarily +unneccesary unnecessary +unneccessarily unnecessarily +unneccessary unnecessary +unnecesarily unnecessarily +unnecesary unnecessary +unoffical unofficial +unoperational nonoperational +unoticeable unnoticeable +unplease displease +unplesant unpleasant +unprecendented unprecedented +unprecidented unprecedented +unrepentent unrepentant +unrepetant unrepentant +unrepetent unrepentant +unsed used, unused, unsaid +unsubstanciated unsubstantiated +unsuccesful unsuccessful +unsuccesfully unsuccessfully +unsuccessfull unsuccessful +unsucesful unsuccessful +unsucesfuly unsuccessfully +unsucessful unsuccessful +unsucessfull unsuccessful +unsucessfully unsuccessfully +unsuprised unsurprised +unsuprising unsurprising +unsuprisingly unsurprisingly +unsuprized unsurprised +unsuprizing unsurprising +unsuprizingly unsurprisingly +unsurprized unsurprised +unsurprizing unsurprising +unsurprizingly unsurprisingly +untill until +untranslateable untranslatable +unuseable unusable +unusuable unusable +unviersity university +unwarrented unwarranted +unweildly unwieldy +unwieldly unwieldy +upcomming upcoming +upgradded upgraded +usally usually +useage usage +usefull useful +usefuly usefully +useing using +usualy usually +ususally usually +vaccum vacuum +vaccume vacuum +vacinity vicinity +vaguaries vagaries +vaieties varieties +vailidty validity +valetta valletta +valuble valuable +valueable valuable +varations variations +varient variant +variey variety +varing varying +varities varieties +varity variety +vasall vassal +vasalls vassals +vegatarian vegetarian +vegitable vegetable +vegitables vegetables +vegtable vegetable +vehicule vehicle +vell well +venemous venomous +vengance vengeance +vengence vengeance +verfication verification +verison version +verisons versions +vermillion vermilion +versitilaty versatility +versitlity versatility +vetween between +veyr very +vigeur vigueur, vigour, vigor +vigilence vigilance +vigourous vigorous +villian villain +villification vilification +villify vilify +villin villi, villain, villein +vincinity vicinity +violentce violence +virutal virtual +virtualy virtually +virutally virtually +visable visible +visably visibly +visting visiting +vistors visitors +vitories victories +volcanoe volcano +voleyball volleyball +volontary voluntary +volonteer volunteer +volonteered volunteered +volonteering volunteering +volonteers volunteers +volounteer volunteer +volounteered volunteered +volounteering volunteering +volounteers volunteers +vreity variety +vrey very +vriety variety +vulnerablility vulnerability +vyer very +vyre very +waht what +wanna want to +warantee warranty +wardobe wardrobe +warrent warrant +warrriors warriors +wasnt wasn't +wass was +watn want +wayword wayward +weaponary weaponry +weas was +wehn when +weild wield, wild +weilded wielded +wendsay Wednesday +wensday Wednesday +wereabouts whereabouts +whant want +whants wants +whcih which +wheras whereas +wherease whereas +whereever wherever +whic which +whihc which +whith with +whlch which +whn when +wholey wholly +wholy wholly, holy +whta what +whther whether +wich which, witch +widesread widespread +wief wife +wierd weird +wiew view +wih with +wiht with +wille will +willingless willingness +wirting writing +withdrawl withdrawal, withdraw +witheld withheld +withing within +withold withhold +witht with +witn with +wiull will +wnat want +wnated wanted +wnats wants +wohle whole +wokr work +wokring working +wonderfull wonderful +workststion workstation +worls world +wordlwide worldwide +worshipper worshiper +worshipping worshiping +worstened worsened +woudl would +wresters wrestlers +wriet write +writen written +wroet wrote +wrok work +wroking working +ws was +wtih with +wupport support +xenophoby xenophobia +yaching yachting +yatch yacht +yeasr years +yeild yield +yeilding yielding +Yementite Yemenite, Yemeni +yearm year +yera year +yeras years +yersa years +youseff yousef +youself yourself +ytou you +yuo you +joo you +zeebra zebra + +[[Category:Wikipedia tools]] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am new file mode 100644 index 0000000000..b8be6c5b6d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.am @@ -0,0 +1,6 @@ +EXTRA_DIST= \ +List_of_common_misspellings.txt \ +Makefile.orig \ +prepare \ +README \ +test diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in new file mode 100644 index 0000000000..11d332705d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/Makefile.in @@ -0,0 +1,435 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = tests/suggestiontest +DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \ + $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \ + $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \ + $(top_srcdir)/m4/inttypes-pri.m4 \ + $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \ + $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \ + $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \ + $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SOURCES = +DIST_SOURCES = +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@ +CATOBJEXT = @CATOBJEXT@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CURSESLIB = @CURSESLIB@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATADIRNAME = @DATADIRNAME@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENCAT = @GENCAT@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIBC2 = @GLIBC2@ +GLIBC21 = @GLIBC21@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_ASPRINTF = @HAVE_ASPRINTF@ +HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ +HAVE_SNPRINTF = @HAVE_SNPRINTF@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +HAVE_WPRINTF = @HAVE_WPRINTF@ +HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@ +HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INSTOBJEXT = @INSTOBJEXT@ +INTLBISON = @INTLBISON@ +INTLLIBS = @INTLLIBS@ +INTLOBJS = @INTLOBJS@ +INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMULTITHREAD = @LIBMULTITHREAD@ +LIBOBJS = @LIBOBJS@ +LIBPTH = @LIBPTH@ +LIBPTH_PREFIX = @LIBPTH_PREFIX@ +LIBS = @LIBS@ +LIBTHREAD = @LIBTHREAD@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBC = @LTLIBC@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBMULTITHREAD = @LTLIBMULTITHREAD@ +LTLIBOBJS = @LTLIBOBJS@ +LTLIBPTH = @LTLIBPTH@ +LTLIBTHREAD = @LTLIBTHREAD@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@ +RANLIB = @RANLIB@ +READLINELIB = @READLINELIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WINDRES = @WINDRES@ +WOE32 = @WOE32@ +WOE32DLL = @WOE32DLL@ +XFAILED = @XFAILED@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = \ +List_of_common_misspellings.txt \ +Makefile.orig \ +prepare \ +README \ +test + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/suggestiontest/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu tests/suggestiontest/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags: TAGS +TAGS: + +ctags: CTAGS +CTAGS: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + distclean distclean-generic distclean-libtool distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README new file mode 100644 index 0000000000..c50e05cea9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/README @@ -0,0 +1,16 @@ +source of text data: Wikipedia +http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines + +For testing Hunspell you need the extended en_US dictionary with phonetic table: +http://hunspell.sourceforge.net/en_US.zip + +test: +make -f Makefile.orig + +test only with Hunspell: + +make -f Makefile.orig single + +test with different input file and dictionaries: + +INPUT=dutchlist.txt HUNSPELL=nl_NL ASPELL=nl make -f Makefile.orig diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare new file mode 100644 index 0000000000..a72d931b8b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/prepare @@ -0,0 +1,40 @@ +#!/bin/bash +# Check common misspellings +# input file format: +# word->word1, ... +# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines + +hunspell=../../src/tools/hunspell +hlang=${HUNSPELL:-en_US} +alang=${ASPELL:-en_US} +input=${INPUT:-List_of_common_misspellings.txt} + +# remove bad words recognised by Hunspell as good +cat $input | sed 's/[-]>/ /' | $hunspell -d $hlang -1 -L | + +# remove items with dash for Aspell +grep '^[^-]* ' | + +# remove spaces from end of lines +sed 's/ *$//' >$input.1 + +# remove bad words recognised by Aspell as good +cut -f 1 -d ' ' $input.1 | aspell -l $alang --list | +awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 | + +# change commas with tabs +sed 's/, */ /g' >$input.2 + +# remove lines with unrecognised suggestions (except suggestion with spaces) +cut -d ' ' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1 +cat x.1 | $hunspell -l -d $hlang >x.2 +cat x.1 | aspell -l $alang --list >>x.2 +cat x.2 | awk 'BEGIN{FS="\t"} +FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3 + +cut -f 1 -d ' ' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' | +sed 's/^.*: //;s/, / /g' >$input.4 + +cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' | +sed 's/^.*: //;s/, / /g' >$input.5 + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test new file mode 100644 index 0000000000..8e6c1cc1ff --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/suggestiontest/test @@ -0,0 +1,25 @@ +#!/bin/bash +# Check common misspellings +# input file format: +# word->word1, ... +# Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines + +input=${INPUT:-List_of_common_misspellings.txt} + +function check() { +cat $1 | awk 'BEGIN{maxord=0;FS="\t"}FILENAME=="-"{for (i=1; i<=NF; i++){a[NR,$(i)]=i};max=NR;next}{x1=a[NR-max,$2];x2=a[NR-max,$3];sug++;if($3)sug++;if (!x1&&!x2){mis2++;misrow=misrow"\n"$0};if(!x1||($3 && !x2))mis++;ord+=x1+x2;}END{ +print "Missed rows", misrow; +print "=======================================" +print maxord, "max. suggestion for a word"; +print max, "input rows"; +print mis2, "missing rows"; +print sug, "expected suggestions"; +print mis, "missing suggestions"; +print ord/(sug-mis), "average ranking"; +}' - $2 +} + +test -f $input.4 && check $input.4 $input.3 >result.aspell +check $input.5 $input.3 >result.hunspell +test -f result.aspell && tail -6 result.aspell +tail -6 result.hunspell diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff new file mode 100644 index 0000000000..60294d24ca --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.aff @@ -0,0 +1,15 @@ +# new suggestion methods of Hunspell 1.5: +# capitalization: nasa -> NASA +# long swap: permenant -> permanent +# long mov: Ghandi -> Gandhi +# double two characters: vacacation -> vacation +# space with REP: "alot" -> "a lot" ("a lot" need to be in the dic file.) + +SET UTF-8 +# switch off ngram suggestion for testing +MAXNGRAMSUGS 0 +REP 1 +REP alot a_lot +KEY qwertzuiop|asdfghjkl|yxcvbnm|aq +WORDCHARS . +FORBIDDENWORD ? diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic new file mode 100644 index 0000000000..cf7c9aadbe --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.dic @@ -0,0 +1,11 @@ +10 +NASA +Gandhi +grateful +permanent +vacation +a +lot +have +which +McDonald diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug new file mode 100644 index 0000000000..e277bdb778 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.sug @@ -0,0 +1,12 @@ +NASA +Gandhi +grateful +permanent +vacation +a lot, lot +permanent. Vacation +have +which +Gandhi +McDonald +permanent diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong new file mode 100644 index 0000000000..4d184d5a61 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/sugutf.wrong @@ -0,0 +1,12 @@ +nasa +Ghandi +greatful +permenant +vacacation +alot +permanent.Vacation +ahev +hwihc +GAndhi +Mcdonald +permqnent diff --git a/extensions/spellcheck/hunspell/tests/unit/data/test.sh b/extensions/spellcheck/hunspell/tests/unit/data/test.sh new file mode 100644 index 0000000000..c89ca9bf75 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/test.sh @@ -0,0 +1,111 @@ +#!/bin/bash +export LC_ALL="C" + +function check_valgrind_log () { +if [ "$VALGRIND" != "" ]; then + if [ -f $TEMPDIR/test.pid* ]; then + log=`ls $TEMPDIR/test.pid*` + if ! grep -q 'ERROR SUMMARY: 0 error' $log; then + echo "Fail in $NAME $1 checking detected by Valgrind" + echo "$log Valgrind log file moved to $TEMPDIR/badlogs" + mv $log $TEMPDIR/badlogs + exit 1 + fi + if grep -q 'LEAK SUMMARY' $log; then + echo "Memory leak in $NAME $1 checking detected by Valgrind" + echo "$log Valgrind log file moved to $TEMPDIR/badlogs" + mv $log $TEMPDIR/badlogs + exit 1 + fi + rm -f $log + fi +fi +} + +TESTDIR=. +TEMPDIR=$TESTDIR/testSubDir +NAME="$1" +shift + +if [ ! -d $TEMPDIR ]; then + mkdir $TEMPDIR +fi + +shopt -s expand_aliases + +alias hunspell='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la ../src/tools/hunspell' +alias analyze='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la ../src/tools/analyze' + +if [ "$VALGRIND" != "" ]; then + rm -f $TEMPDIR/test.pid* + if [ ! -d $TEMPDIR/badlogs ]; then + mkdir $TEMPDIR/badlogs + fi + + alias hunspell='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test.pid ../src/tools/hunspell' + alias analyze='../libtool --mode=execute -dlopen ../src/hunspell/.libs/libhunspell*.la valgrind --tool=$VALGRIND --leak-check=yes --show-reachable=yes --log-file=$TEMPDIR/test.pid ../src/tools/analyze' +fi + +# Tests good words +if test -f $TESTDIR/$NAME.good; then + hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.good >$TEMPDIR/$NAME.good + if test -s $TEMPDIR/$NAME.good; then + echo "=============================================" + echo "Fail in $NAME.good. Good words recognised as wrong:" + cat $TEMPDIR/$NAME.good + rm -f $TEMPDIR/$NAME.good + exit 1 + fi + rm -f $TEMPDIR/$NAME.good +fi + +check_valgrind_log "good words" + +# Tests bad words +if test -f $TESTDIR/$NAME.wrong; then + hunspell -l $* -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong + tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab + if ! cmp $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.wrong. Bad words recognised as good:" + tr -d ' ' <$TESTDIR/$NAME.wrong >$TEMPDIR/$NAME.wrong.detab + diff $TEMPDIR/$NAME.wrong.detab $TEMPDIR/$NAME.wrong | grep '^<' | sed 's/^..//' + rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab + exit 1 + fi + rm -f $TEMPDIR/$NAME.wrong $TEMPDIR/$NAME.wrong.detab +fi + +check_valgrind_log "bad words" + +# Tests morphological analysis +if test -f $TESTDIR/$NAME.morph; then + sed 's/ $//' $TESTDIR/$NAME.good >$TEMPDIR/$NAME.good + analyze $TESTDIR/$NAME.aff $TESTDIR/$NAME.dic $TEMPDIR/$NAME.good >$TEMPDIR/$NAME.morph + if ! cmp $TEMPDIR/$NAME.morph $TESTDIR/$NAME.morph >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.morph. Bad analysis?" + diff $TESTDIR/$NAME.morph $TEMPDIR/$NAME.morph | grep '^<' | sed 's/^..//' + rm -f $TEMPDIR/$NAME.morph + exit 1 + fi + rm -f $TEMPDIR/$NAME.{morph,good} +fi + +check_valgrind_log "morphological analysis" + +# Tests suggestions +if test -f $TESTDIR/$NAME.sug; then + hunspell $* -a -d $TESTDIR/$NAME <$TESTDIR/$NAME.wrong | grep '^&' | \ + sed 's/^[^:]*: //' >$TEMPDIR/$NAME.sug + if ! cmp $TEMPDIR/$NAME.sug $TESTDIR/$NAME.sug >/dev/null; then + echo "=============================================" + echo "Fail in $NAME.sug. Bad suggestion?" + diff $TESTDIR/$NAME.sug $TEMPDIR/$NAME.sug + rm -f $TEMPDIR/$NAME.sug + exit 1 + fi + rm -f $TEMPDIR/$NAME.sug +fi + +check_valgrind_log "suggestion" diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff new file mode 100644 index 0000000000..f56998b9f3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.aff @@ -0,0 +1,3 @@ +SET UTF-8 + +# removing byte order mark from affix file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic new file mode 100644 index 0000000000..8b10768e55 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.dic @@ -0,0 +1,2 @@ +1 +apéritif diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good new file mode 100644 index 0000000000..c344eaf5ac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.good @@ -0,0 +1,2 @@ +apéritif +APÉRITIF diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test new file mode 100644 index 0000000000..1d25699aa2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 -1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff new file mode 100644 index 0000000000..784935c841 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.aff @@ -0,0 +1,3 @@ +SET UTF-8 + +# removing byte order mark from dic file diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic new file mode 100644 index 0000000000..b763179a0d --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.dic @@ -0,0 +1,2 @@ +1 +apéritif diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good new file mode 100644 index 0000000000..c344eaf5ac --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.good @@ -0,0 +1,2 @@ +apéritif +APÉRITIF diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test new file mode 100644 index 0000000000..1d25699aa2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-bom2.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 -1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff new file mode 100644 index 0000000000..979e3c2284 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.aff @@ -0,0 +1 @@ +SET UTF-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic new file mode 100644 index 0000000000..4a040eeb0b --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.dic @@ -0,0 +1,5 @@ +4 # Old Persian numbers (1-4), source: Wikipedia +𐏑 +𐏒 +𐏒𐏑 +𐏒𐏒 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good new file mode 100644 index 0000000000..9f989d3397 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.good @@ -0,0 +1,5 @@ +𐏑 +𐏒 +𐏒𐏑 +𐏒𐏒 + diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug new file mode 100644 index 0000000000..bfe2a539fe --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.sug @@ -0,0 +1,2 @@ +𐏒𐏑, 𐏒𐏒 +𐏒𐏑, 𐏒𐏒 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test new file mode 100644 index 0000000000..1d25699aa2 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 -1 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong new file mode 100644 index 0000000000..d18dfa4c20 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8-nonbmp.wrong @@ -0,0 +1,2 @@ +𐏑𐏒𐏒 +𐏑𐏒𐏒 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff b/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff new file mode 100644 index 0000000000..e8934d71b5 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.aff @@ -0,0 +1,10 @@ +SET UTF-8 + +SFX A Y 7 +SFX A 0 őő . +SFX A 0 ő o +SFX A 0 ő ó +SFX A ó ő ó +SFX A ó őoo ó +SFX A o őo o +SFX A 0 ó [abcdó] diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic b/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic new file mode 100644 index 0000000000..e7cb34daf6 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.dic @@ -0,0 +1,3 @@ +2 +foo/A +foó/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.good b/extensions/spellcheck/hunspell/tests/unit/data/utf8.good new file mode 100644 index 0000000000..08aa4dadf9 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.good @@ -0,0 +1,9 @@ +foo +foó +fooőő +fooő +foóő +foő +foőo +foőoo +foóó diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utf8.test b/extensions/spellcheck/hunspell/tests/unit/data/utf8.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utf8.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff new file mode 100644 index 0000000000..43506afa31 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.aff @@ -0,0 +1,3 @@ +SET UTF-8 +COMPOUNDMIN 3 +COMPOUNDFLAG A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic new file mode 100644 index 0000000000..ab90a1b703 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.dic @@ -0,0 +1,9 @@ +8 +foo/A +bar/A +fóó/A +áár/A +xy/A +yz/A +éé/A +őő/A diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good new file mode 100644 index 0000000000..1a1a1b19c0 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.good @@ -0,0 +1,5 @@ +foobar +barfoo +foobarfoo +fóóáár +áárfóó diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test new file mode 100644 index 0000000000..cde7c54109 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME -i utf-8 diff --git a/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong new file mode 100644 index 0000000000..fa385c1b03 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/utfcompound.wrong @@ -0,0 +1,7 @@ +xyyz +fooxy +xyfoo +fooxybar +ééőő +fóóéé +őőáár diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.aff b/extensions/spellcheck/hunspell/tests/unit/data/warn.aff new file mode 100644 index 0000000000..d586fa33ef --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.aff @@ -0,0 +1,13 @@ +# WARN flag +# The signed word, and its suffixed forms result warning message in command-line + +#Use to forbid the words with flag WARN +#FORBIDWARN + +WARN W + +SFX A Y 1 +SFX A 0 s . + +REP 1 +REP foo bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.dic b/extensions/spellcheck/hunspell/tests/unit/data/warn.dic new file mode 100644 index 0000000000..d63f6047ea --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.dic @@ -0,0 +1,3 @@ +1 +foo/WA +bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.good b/extensions/spellcheck/hunspell/tests/unit/data/warn.good new file mode 100644 index 0000000000..542f439a4f --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.good @@ -0,0 +1,2 @@ +foo +foos diff --git a/extensions/spellcheck/hunspell/tests/unit/data/warn.test b/extensions/spellcheck/hunspell/tests/unit/data/warn.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/warn.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff new file mode 100644 index 0000000000..fdb047b0c1 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.aff @@ -0,0 +1,12 @@ +PSEUDOROOT X +COMPOUNDFLAG Y + +SFX A Y 1 +SFX A 0 0 . > + +SFX B Y 1 +SFX B 0 0 . <ZERO>> + +SFX C Y 2 +SFX C 0 0/XAB . <ZERODERIV> +SFX C 0 baz/XAB . <DERIV> diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic new file mode 100644 index 0000000000..72cba8d346 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.dic @@ -0,0 +1,3 @@ +2 +foo/XA <FOO +bar/XABC <BAR diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good new file mode 100644 index 0000000000..b1fb3ba5c3 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.good @@ -0,0 +1,3 @@ +bar +foo +barbaz diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph new file mode 100644 index 0000000000..bcb788ad75 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.morph @@ -0,0 +1,13 @@ +> bar +analyze(bar) = st:bar <BAR <ZERO>> +analyze(bar) = st:bar <BAR > +analyze(bar) = st:bar <BAR <ZERODERIV> <ZERO>> +analyze(bar) = st:bar <BAR <ZERODERIV> > +stem(bar) = bar +> foo +analyze(foo) = st:foo <FOO > +stem(foo) = foo +> barbaz +analyze(barbaz) = st:bar <BAR <DERIV> <ZERO>> +analyze(barbaz) = st:bar <BAR <DERIV> > +stem(barbaz) = bar diff --git a/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test new file mode 100644 index 0000000000..7f44369060 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/data/zeroaffix.test @@ -0,0 +1,4 @@ +#!/bin/sh +DIR="`dirname $0`" +NAME="`basename $0 .test`" +$DIR/test.sh $NAME diff --git a/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js b/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js new file mode 100644 index 0000000000..28736fc023 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/test_hunspell.js @@ -0,0 +1,250 @@ +/* Any copyright is dedicated to the Public Domain. + * http://creativecommons.org/publicdomain/zero/1.0/ + */ + +const tests = [ + ["affixes", "iso-8859-1"], + ["condition", "iso-8859-1"], + ["condition-utf", "UTF-8"], + ["base", "iso-8859-1"], + ["base-utf", "UTF-8"], + ["allcaps", "iso-8859-1"], + ["allcaps-utf", "UTF-8"], + ["allcaps2", "iso-8859-1"], + ["allcaps3", "iso-8859-1"], + ["keepcase", "iso-8859-1"], + ["i58202", "iso-8859-1"], + ["map", "iso-8859-1"], + ["rep", "iso-8859-1"], + ["sug", "iso-8859-1"], + ["sugutf", "UTF-8"], + ["phone", "iso-8859-1"], + ["flag", "iso-8859-1"], + ["flaglong", "iso-8859-1"], + ["flagnum", "iso-8859-1"], + ["flagutf8", "UTF-8"], + ["slash", "iso-8859-1"], + ["forbiddenword", "iso-8859-1"], + ["nosuggest", "iso-8859-1"], + ["alias", "iso-8859-1"], + ["alias2", "iso-8859-1"], + ["alias3", "iso-8859-1"], + ["breakdefault", "iso-8859-1"], + ["break", "UTF-8"], + ["needaffix", "iso-8859-1"], + ["needaffix2", "iso-8859-1"], + ["needaffix3", "iso-8859-1"], + ["needaffix4", "iso-8859-1"], + ["needaffix5", "iso-8859-1"], + ["circumfix", "iso-8859-1"], + ["fogemorpheme", "iso-8859-1"], + ["onlyincompound", "iso-8859-1"], + ["complexprefixes", "iso-8859-1"], + ["complexprefixes2", "iso-8859-1"], + ["complexprefixesutf", "UTF-8"], + ["conditionalprefix", "iso-8859-1"], + ["zeroaffix", "iso-8859-1"], + ["utf8", "UTF-8"], + ["utf8-bom", "UTF-8", { 1: "todo" }], + ["utf8-bom2", "UTF-8", { 1: "todo" }], + ["utf8-nonbmp", "UTF-8", { 1: "todo", 2: "todo", 3: "todo", 4: "todo" }], + ["compoundflag", "iso-8859-1"], + ["compoundrule", "iso-8859-1"], + ["compoundrule2", "iso-8859-1"], + ["compoundrule3", "iso-8859-1"], + ["compoundrule4", "iso-8859-1"], + ["compoundrule5", "UTF-8"], + ["compoundrule6", "iso-8859-1"], + ["compoundrule7", "iso-8859-1"], + ["compoundrule8", "iso-8859-1"], + ["compoundaffix", "iso-8859-1"], + ["compoundaffix2", "iso-8859-1"], + ["compoundaffix3", "iso-8859-1"], + ["checkcompounddup", "iso-8859-1"], + ["checkcompoundtriple", "iso-8859-1"], + ["simplifiedtriple", "iso-8859-1"], + ["checkcompoundrep", "iso-8859-1"], + ["checkcompoundcase2", "iso-8859-1"], + ["checkcompoundcaseutf", "UTF-8"], + ["checkcompoundpattern", "iso-8859-1"], + ["checkcompoundpattern2", "iso-8859-1"], + ["checkcompoundpattern3", "iso-8859-1"], + ["checkcompoundpattern4", "iso-8859-1"], + ["utfcompound", "UTF-8"], + ["checksharps", "iso-8859-1"], + ["checksharpsutf", "UTF-8"], + ["germancompounding", "iso-8859-1"], + ["germancompoundingold", "iso-8859-1"], + ["i35725", "iso-8859-1"], + ["i53643", "iso-8859-1"], + ["i54633", "iso-8859-1"], + ["i54980", "iso-8859-1", { 1: "todo", 3: "todo" }], + ["maputf", "UTF-8"], + ["reputf", "UTF-8"], + ["ignore", "iso-8859-1"], + [ + "ignoreutf", + "UTF-8", + { + 1: "todo", + 2: "todo", + 3: "todo", + 4: "todo", + 5: "todo", + 6: "todo", + 7: "todo", + 8: "todo", + }, + ], + ["1592880", "iso-8859-1"], + ["1695964", "iso-8859-1"], + ["1463589", "iso-8859-1"], + ["1463589-utf", "UTF-8"], + ["IJ", "iso-8859-1"], + ["i68568", "iso-8859-1"], + ["i68568utf", "UTF-8"], + ["1706659", "iso-8859-1"], + ["digits-in-words", "iso-8859-1"], + // ["colons-in-words", "iso-8859-1"], Suggestion test only + ["ngram-utf-fix", "UTF-8"], + [ + "morph", + "us-ascii", + { + 11: "todo", + 12: "todo", + 13: "todo", + 14: "todo", + 15: "todo", + 16: "todo", + 17: "todo", + 18: "todo", + 19: "todo", + 20: "todo", + 21: "todo", + 22: "todo", + 23: "todo", + 24: "todo", + 25: "todo", + 26: "todo", + 27: "todo", + }, + ], + ["1975530", "UTF-8"], + ["fullstrip", "iso-8859-1"], + ["iconv", "UTF-8"], + ["oconv", "UTF-8"], + ["encoding", "iso-8859-1", { 1: "todo", 3: "todo" }], + ["korean", "UTF-8"], + ["opentaal-forbiddenword1", "UTF-8"], + ["opentaal-forbiddenword2", "UTF-8"], + ["opentaal-keepcase", "UTF-8"], + ["arabic", "UTF-8"], + ["2970240", "iso-8859-1"], + ["2970242", "iso-8859-1"], + ["breakoff", "iso-8859-1"], + ["opentaal-cpdpat", "iso-8859-1"], + ["opentaal-cpdpat2", "iso-8859-1"], + ["2999225", "iso-8859-1"], + ["onlyincompound2", "iso-8859-1"], + ["forceucase", "iso-8859-1"], + ["warn", "iso-8859-1"], +]; + +// eslint-disable-next-line no-shadow +function* do_get_file_by_line(file, charset) { + dump("getting file by line for file " + file.path + "\n"); + dump("using charset1" + charset + "\n"); + let fis = Cc["@mozilla.org/network/file-input-stream;1"].createInstance( + Ci.nsIFileInputStream + ); + fis.init(file, 0x1 /* READONLY */, 0o444, Ci.nsIFileInputStream.CLOSE_ON_EOF); + + let lis = Cc["@mozilla.org/intl/converter-input-stream;1"].createInstance( + Ci.nsIConverterInputStream + ); + lis.init(fis, charset, 1024, 0); + lis.QueryInterface(Ci.nsIUnicharLineInputStream); + + let val = {}; + while (lis.readLine(val)) { + yield val.value; + val = {}; + } +} + +function do_run_test(checker, name, charset, todo_good, todo_bad) { + dump("\n\n\n\n"); + dump("running test for " + name + "\n"); + if (!checker) { + do_throw("Need spell checker here!"); + } + + let good = do_get_file("data/" + name + ".good", true); + let bad = do_get_file("data/" + name + ".wrong", true); + let sug = do_get_file("data/" + name + ".sug", true); + + dump("Need some expected output\n"); + Assert.ok(good.exists() || bad.exists() || sug.exists()); + + dump("Setting dictionary to " + name + "\n"); + checker.dictionaries = [name]; + + if (good.exists()) { + var good_counter = 0; + for (const val of do_get_file_by_line(good, charset)) { + let todo = false; + good_counter++; + if (todo_good && todo_good[good_counter]) { + todo = true; + dump("TODO\n"); + } + + dump("Expect word " + val + " is spelled correctly\n"); + if (todo) { + todo_check_true(checker.check(val)); + } else { + Assert.ok(checker.check(val)); + } + } + } + + if (bad.exists()) { + var bad_counter = 0; + for (const val of do_get_file_by_line(bad, charset)) { + let todo = false; + bad_counter++; + if (todo_bad && todo_bad[bad_counter]) { + todo = true; + dump("TODO\n"); + } + + dump("Expect word " + val + " is spelled wrong\n"); + if (todo) { + todo_check_false(checker.check(val)); + } else { + Assert.ok(!checker.check(val)); + } + } + } + + // XXXkhuey test suggestions +} + +function run_test() { + let spellChecker = Cc["@mozilla.org/spellchecker/engine;1"].getService( + Ci.mozISpellCheckingEngine + ); + + Assert.ok(!!spellChecker, "Should have a spell checker"); + spellChecker.QueryInterface(Ci.mozISpellCheckingEngine); + let testdir = do_get_file("data/", false); + spellChecker.loadDictionariesFromDir(testdir); + + function do_run_test_closure(test) { + let [name, charset, todo_good, todo_bad] = test; + do_run_test(spellChecker, name, charset, todo_good, todo_bad); + } + + tests.forEach(do_run_test_closure); +} diff --git a/extensions/spellcheck/hunspell/tests/unit/test_hunspell_unicode_paths.js b/extensions/spellcheck/hunspell/tests/unit/test_hunspell_unicode_paths.js new file mode 100644 index 0000000000..3fb4c78b59 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/test_hunspell_unicode_paths.js @@ -0,0 +1,42 @@ +"use strict"; + +const { XPCOMUtils } = ChromeUtils.importESModule( + "resource://gre/modules/XPCOMUtils.sys.mjs" +); + +XPCOMUtils.defineLazyServiceGetter( + this, + "spellCheck", + "@mozilla.org/spellchecker/engine;1", + "mozISpellCheckingEngine" +); + +const nsFile = Components.Constructor( + "@mozilla.org/file/local;1", + "nsIFile", + "initWithPath" +); + +add_task(async function () { + let prof = do_get_profile(); + + let basePath = PathUtils.join(prof.path, "\u263a", "dictionaries"); + let baseDir = nsFile(basePath); + await IOUtils.makeDirectory(basePath, { createAncestors: true }); + + let dicPath = PathUtils.join(basePath, "dict.dic"); + let affPath = PathUtils.join(basePath, "dict.aff"); + + const WORD = "Flehgragh"; + + await IOUtils.writeUTF8(dicPath, `1\n${WORD}\n`); + await IOUtils.writeUTF8(affPath, ""); + + spellCheck.loadDictionariesFromDir(baseDir); + spellCheck.dictionaries = ["dict"]; + + ok( + spellCheck.check(WORD), + "Dictionary should have been loaded from a unicode path" + ); +}); diff --git a/extensions/spellcheck/hunspell/tests/unit/xpcshell.toml b/extensions/spellcheck/hunspell/tests/unit/xpcshell.toml new file mode 100644 index 0000000000..a4f20b4797 --- /dev/null +++ b/extensions/spellcheck/hunspell/tests/unit/xpcshell.toml @@ -0,0 +1,9 @@ +[DEFAULT] +head = "" +skip-if = ["os == 'android'"] +support-files = ["data/**"] +firefox-appdir = "browser" + +["test_hunspell.js"] + +["test_hunspell_unicode_paths.js"] |