diff options
Diffstat (limited to 'third_party/rust/mapped_hyph')
30 files changed, 11304 insertions, 0 deletions
diff --git a/third_party/rust/mapped_hyph/.cargo-checksum.json b/third_party/rust/mapped_hyph/.cargo-checksum.json new file mode 100644 index 0000000000..1060c2f5a8 --- /dev/null +++ b/third_party/rust/mapped_hyph/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{".travis.yml":"4d1af7257c9619f7ae66fc271ba2c1be5f063640ae8ceaa235c8c8aaf32f44ea","COPYRIGHT":"4df931055b82b96e13ad475c4cee3de5afa69a54a4c611c9d7dc6252d858d9c8","Cargo.toml":"be71dedbd0cf3cb3302835b40976cfacde6b7ec30df010dad9152111d119a7c6","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"4ad721b5b6a3d39ca3e2202f403d897c4a1d42896486dd58963a81f8e64ef61d","README.md":"2c91137faee83f0805a9b9123e105670bf60c2fe45ce6536fb92df7ef85017a5","benches/bench.rs":"ed7143e66ecf8bfb12c87d1f9344157d97696b8194de9132d061129bc80d8d52","cbindgen.toml":"452e79bea00e2a0c16a03ac04e454a0c5955becf2d0306ccce7d1c13d3bcc51a","doc/mapped_hyph_format.md":"2f2487cf536fe4b03db6e4b384be06744ec30b3f299519492288306a93127fbb","hyph_en_US.hyf":"6262b4c5118fe277ab4add8689d9524ca72097564652baec67a8fcd5029ec9b0","src/bin/hyf_compile.rs":"85199ddf171219b61a2da0e6acf675bf0f7a9a11ee2c6c5d1d436ec466aa95b5","src/builder.rs":"4169a89fb3a5025b06edeb8a6435a18814d58799d15861c3639a2ed9c63c628b","src/ffi.rs":"09884728df4910bb430e0d59edf770b04e5b11e2423f75c5782c5152af323476","src/lib.rs":"30c007a5f8bf71af3b4b93227c3fbb76198d6333388e5c156aaff13bfe458c8e","src/main.rs":"666befeb39cb1a7dfb66c6b9218d5f7b6c4ed09dbbbc8cfff6b749a33a99ebcf","tests/base.hyf":"d8bf57c6280cfa1d357d3fdba156ce64afbd9df58e28eeb084dfe3f80972b73f","tests/base.hyph":"a3f1fab24c101701fdf21e8359685d80611ab970304e2bd89ef024768b3700c8","tests/base.word":"1136c9a421b242262661b9a65723f87a5ecf77ae38eabcea057832d036d567fd","tests/compound.hyf":"929c1ba6676e4c43bc649d0abf4275ea9e8b02bffaa5acdf704a710813a7a13c","tests/compound4.hyf":"2093287bc41ee30ff9bdbf278f1f8209cb1d1a78236b46e9060af2a881572b8e","tests/compound5.hyf":"0942a5dfbb8d0ef3a937ab9da0418abb41300357cde49f4c477a59a11b2cb6bd","tests/compound6.hyf":"ebad958c2692a5b439b31e324020ed27c42dc05bd5b8c6a6dea4669e6ccf76b4","tests/hyphen.hyf":"92b8a5c86aac6a0b9f0eb7330a057065d6985fd047e851cae47039995c682d4d","tests/lhmin.hyf":"23c886704fafee7d9c54b2478029cf69a5fa946c2f2442bd86697bca5933c88d","tests/num.hyf":"4834fabe78b5c81815434d4562ce3322541649e1ea1edc555a498574bc8b237e","tests/rhmin.hyf":"239cb3d4d7f904abb43b57241e12cc1396e636220c3806e64666aca7ca46cc42","tests/settings2.hyf":"9fc4855e0b952a3593db1efef080b93ce7f1c6fe6798db0440e2bf0cc986ffa2","tests/settings3.hyf":"867db207b485a06e7d60ad10735c9111f10516ee3a5afd6306c683ace3454491","tests/test.rs":"5c81ae59b9384b70d9461407999dac1fde9214398876c4433fbbde9571cc1d94"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/mapped_hyph/.travis.yml b/third_party/rust/mapped_hyph/.travis.yml new file mode 100644 index 0000000000..8c91a7415d --- /dev/null +++ b/third_party/rust/mapped_hyph/.travis.yml @@ -0,0 +1,8 @@ +language: rust +rust: + - stable + - beta + - nightly +matrix: + allow_failures: + - rust: nightly diff --git a/third_party/rust/mapped_hyph/COPYRIGHT b/third_party/rust/mapped_hyph/COPYRIGHT new file mode 100644 index 0000000000..a1254361b3 --- /dev/null +++ b/third_party/rust/mapped_hyph/COPYRIGHT @@ -0,0 +1,12 @@ +mapped_hyph is copyright 2019 Mozilla Foundation. + +Licensed under the Apache License, Version 2.0 +<LICENSE-APACHE or +https://www.apache.org/licenses/LICENSE-2.0> or the MIT +license <LICENSE-MIT or https://opensource.org/licenses/MIT>, +at your option. All files in the project carrying such +notice may not be copied, modified, or distributed except +according to those terms. + +Code in the subdirectories /test/ and /bench/ is dedicated +to the Public Domain. diff --git a/third_party/rust/mapped_hyph/Cargo.toml b/third_party/rust/mapped_hyph/Cargo.toml new file mode 100644 index 0000000000..2c2d762079 --- /dev/null +++ b/third_party/rust/mapped_hyph/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "mapped_hyph" +description = "Hyphenation using precompiled memory-mapped tables" +version = "0.4.3" +authors = ["Jonathan Kew <jfkthame@gmail.com>"] +license = "MIT/Apache-2.0" +edition = "2018" + +[dependencies] +memmap2 = { version = ">=0.2,<=0.5" } +arrayref = "0.3.5" +log = "0.4" + +[dev-dependencies] +criterion = "0.3" + +[[bench]] +name = "bench" +harness = false diff --git a/third_party/rust/mapped_hyph/LICENSE-APACHE b/third_party/rust/mapped_hyph/LICENSE-APACHE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/third_party/rust/mapped_hyph/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/rust/mapped_hyph/LICENSE-MIT b/third_party/rust/mapped_hyph/LICENSE-MIT new file mode 100644 index 0000000000..b4850c9520 --- /dev/null +++ b/third_party/rust/mapped_hyph/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2019 Mozilla Foundation + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/mapped_hyph/README.md b/third_party/rust/mapped_hyph/README.md new file mode 100644 index 0000000000..c4251e5305 --- /dev/null +++ b/third_party/rust/mapped_hyph/README.md @@ -0,0 +1,90 @@ +# mapped_hyph + +mapped_hyph is a reimplementation of the hyphenation algorithm from the +[libhyphen](https://github.com/hunspell/hyphen) library +that is intended to reduce the in-memory footprint of loaded +hyphenation dictionaries, especially when the same dictionary +may be in use by multiple processes. + +To reduce memory footprint, mapped_hyph uses hyphenation dictionaries that are +"precompiled" into a flat, position-independent binary format that is used +directly by the runtime hyphenation functions. +Therefore, dictionaries do not have to be parsed into a dynamic structure in memory; +the files can simply be mmap'd into the address space and immediately used. +In addition, a compiled dictionary mapped into a shared-memory block +can be made available to multiple processes for no added physical memory cost. + +One deliberate simplification compared to libhyphen +is that mapped_hyph only accepts UTF-8 text and hyphenation dictionaries; +legacy non-Unicode encodings are not supported. + +mapped_hyph has been created primarily for use by Gecko, replacing the use of libhyphen, +and so its features (and limitations) are based on this use case. +However, it is hoped that it will also be more generally useful. + +## Functionality + +Currently, mapped_hyph supports only "standard" hyphenation, where spelling does not +change around the hyphenation position. At present this is the only kind of +hyphenation supported in Gecko. + +The compiled hyphenation dictionary format includes provision for replacement +strings and indexes, as used by libhyphen to support non-standard hyphenations +(e.g. German "Schiffahrt" -> "Schiff-fahrt"), but the `find_hyphen_values` function +will ignore any such hyphenation positions it finds. +(None of the hyphenation dictionaries shipping with Firefox includes such rules.) + +## Licensing + +mapped_hyph is dual licensed under the Apache-2.0 and MIT licenses; +see the file COPYRIGHT. + +## Documentation + +Use `cargo doc --open` to view (admittedly brief) documentation generated from +comments in the source. + +## C and C++ bindings + +See src/ffi.rs for C/C++ APIs that can be used to load hyphenation files +and to locate valid hyphenation positions in a word. + +## Sample programs + +See main.rs for a simple example program. + +## Compiled dictionaries + +The `hyf_compile` tool is used to generate `.hyf` files for mapped_hyph +from standard `.dic` (or `.pat`) files as used by libhyphen, LibreOffice, etc. + +(A compiled version of the `hyph_en_US` dictionary from libhyphen is currently +included here, as it is handy for testing purposes.) + +## Release Notes + +### 0.4.0 + +* Added a boolean `compress` param to the pattern compiler to control whether + it attempts to compress the compiled table by merging duplicate states (which + takes significant extra time). + +* Added FFI functions to compile hyphenation tables from a file path or a buffer, + intended for use from Gecko. + +### 0.3.0 + +* Switched from MPL2 to Apache2/MIT dual license. + +* Misc bug-fixes and optimizations. + +### 0.2.0 + +* Implemented a hyphenation table compiler in the `builder` submodule, + and `hyf_compile` command-line tool. + +* Moved C-callable API functions into an `ffi` submodule. + +### 0.1.0 + +* Initial release. diff --git a/third_party/rust/mapped_hyph/benches/bench.rs b/third_party/rust/mapped_hyph/benches/bench.rs new file mode 100644 index 0000000000..cf4ad6cb2f --- /dev/null +++ b/third_party/rust/mapped_hyph/benches/bench.rs @@ -0,0 +1,50 @@ +// Any copyright to the test code below is dedicated to the Public Domain. +// http://creativecommons.org/publicdomain/zero/1.0/ + +use criterion::black_box; +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::BenchmarkId; +use criterion::Criterion; + +use mapped_hyph::Hyphenator; +use std::fs; + +const SAMPLE_SIZE: usize = 300; +const DIC_PATH: &str = "hyph_en_US.hyf"; + +fn bench_construct(c: &mut Criterion) { + c.bench_function("construct", |b| { + b.iter(|| { + let dic = unsafe { mapped_hyph::load_file(DIC_PATH) } + .expect(&format!("failed to load dictionary {}", DIC_PATH)); + let _ = Hyphenator::new(black_box(&*dic)); + }) + }); +} + +fn bench_find_hyphen_values(c: &mut Criterion) { + // XXX: Should we copy this file to the crate to ensure reproducability? + let data = fs::read_to_string("/usr/share/dict/words").expect("File reading failed."); + let words: Vec<&str> = data.lines().take(SAMPLE_SIZE).collect(); + + let dic = unsafe { mapped_hyph::load_file(DIC_PATH) } + .expect(&format!("failed to load dictionary {}", DIC_PATH)); + let hyph = Hyphenator::new(&*dic); + + c.bench_with_input( + BenchmarkId::new("bench_word", SAMPLE_SIZE), + &words, + |b, words| { + b.iter(|| { + let mut values: Vec<u8> = vec![0; 1000]; + for w in words { + hyph.find_hyphen_values(&w, &mut values); + } + }); + }, + ); +} + +criterion_group!(benches, bench_construct, bench_find_hyphen_values,); +criterion_main!(benches); diff --git a/third_party/rust/mapped_hyph/cbindgen.toml b/third_party/rust/mapped_hyph/cbindgen.toml new file mode 100644 index 0000000000..6535c0754d --- /dev/null +++ b/third_party/rust/mapped_hyph/cbindgen.toml @@ -0,0 +1,32 @@ +# This is a template cbindgen.toml file with all of the default values. +# Some values are commented out because their absence is the real default. +# +# See https://github.com/eqrion/cbindgen/blob/master/docs.md#cbindgentoml +# for detailed documentation of available options. + +language = "C" + +############## Options for Wrapping the Contents of the Header ################# + +header = """/* + * Copyright 2019 Mozilla Foundation. See the COPYRIGHT + * file at the top-level directory of this distribution. + * + * Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or + * https://www.apache.org/licenses/LICENSE-2.0> or the MIT license + * <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your + * option. This file may not be copied, modified, or distributed + * except according to those terms. +**/ + +/* clang-format off */ +""" +trailer = "/* clang-format on */" +include_guard = "mapped_hyph_h" +autogen_warning = """/* + * Warning, this file is autogenerated by cbindgen. Don't modify this manually. + */ +""" +include_version = false +sys_includes = ["stdbool.h", "stdint.h"] +no_includes = true diff --git a/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md b/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md new file mode 100644 index 0000000000..d98162d7ea --- /dev/null +++ b/third_party/rust/mapped_hyph/doc/mapped_hyph_format.md @@ -0,0 +1,98 @@ +# Compiled hyphenation table format for mapped_hyph + +The file is a "flattened" representation of the list of `HyphenDict` structs +and descendant objects used by libhyphen +(see [hyphen.h](https://github.com/hunspell/hyphen/blob/master/hyphen.h)). + +Note that multi-byte integer types in the file are stored in _little-endian_ byte order. + +## Overall file header + +The file begins with a 4-byte "signature", followed by a count of the number +of hyphenation levels, and an array of offsets to each hyphenation level. +A "level" is essentially equivalent to libhyphen's `HyphenDict`. + +### Header (size: 8 bytes + 4 * numLevels) +Type | Name | Description +-----|------|------------ +uint8[4] | magicNumber | 4-byte file identification code: ['H', 'y', 'f', '0'] +uint32 | numLevels | number of hyphenation levels present +uint32[numLevels] | levelOffset | offset from start of file to each Level + +Currently, there are normally 2 hyphenation levels, as the parser/compiler will +generate a default first level if no NEXTLEVEL keyword is present in the pattern file. + +## Hyphenation Level + +Each level of the hyphenation pattern begins with a Level header, followed by +the data for its states and the strings they refer to. +When the hyphenation machine is executed, we always begin at state offset 0 +(from the level's stateDataBase); each transition to a new state represents the +target directly by its offset from stateDataBase. +A state offset of 0xFFFFFF is considered invalid. + +Strings are represented as offsets from the level's stringDataBase; each string +is encoded as a one-byte length followed by `length` bytes of utf-8 data. +(So the maximum string length is 255 utf-8 code units; this is far more than any actual +hyphenation dictionary uses). +A string offset of 0xFFFF is considered invalid and represents an absent string. + +The minimum number of characters that must be kept together at the start/end of a word, +or of a component of a compound (i.e. the `...Min` values) is a count of _Unicode characters_, +not UTF-8 code units. (Note that the presentation-form ligature characters U+FB00 'ff' through U+FB06 'st' +are counted as 2 or 3 characters for this purpose.) + +### Level (size: 16 bytes + state data + string data, padded to a 4-byte boundary) +Type | Name | Description +-----|------|------------ +uint32 | stateDataBase | offset from beginning of Level to start of level's State data +uint32 | stringDataBase | offset from beginning of Level to start of level's packed String data +uint16 | noHyphenStringOffset | from level's stringDataBase +uint16 | noHyphenCount | number of (NUL-separated) strings in the nohyphen string +uint8 | leftHyphenMin | minimum number of characters kept together at start of word +uint8 | rightHyphenMin | minimum number of characters kept together at end of word +uint8 | compoundLeftHyphenMin | minimum number of characters kept together at start of second component of a compound +uint8 | compoundRightHyphenMin | minimum number of characters kept together at end of first component of a compound + +## State + +Each state, referred to by its offset from the level's stateDataBase, consists of a header +followed by an array of transitions for input bytes that need to be matched in this state. +The state also records a fallback state offset, which is the transition to be taken +if the next input byte does not match any of the transition records. + +If a match string is present (i.e. `matchStringOffset` is not 0xFFFF), it is a string of hyphenation values +(encoded as ASCII digits '0'..'9') to be applied at the current position in the word. + +### StateHeader (size: 8 bytes) +Type | Name | Description +-----|------|------------ +uint32 | fallbackStateOffset | (from level's stateDataBase) +uint16 | matchStringOffset | (from level's stringDataBase) +uint8 | numTransitions | count of Transitions that follow the StateHeader and optional StateHeaderExtension +uint8 | isExtended | if non-zero, the StateHeader is immediately followed by a StateHeaderExtension + +If the `isExtended` flag in the state header is set, this state includes a potential spelling change +and there is an extended form of the header present before the array of transitions. +(Note that extended states with spelling-change rules are not yet supported by the mapped_hyph engine; +none of the hyphenation dictionaries shipped with Firefox includes such rules.) + +### StateHeaderExtension (size: 4 bytes) +Type | Name | Description +-----|------|------------ +uint16 | replacementStringOffset | (from level's stringDataBase) the replacement string +int8 | replacementIndex | index of the byte position (relative to current position in the word) at which the spelling replacement should happen +int8 | replacementCut | number of bytes to cut from the original word when making the replacement + +## Transitions + +The state's transitions are encoded as an array of Transition records, each corresponding to an input byte +and providing the offset of the new state. The transitions for each state are sorted by ascending value of input byte +(although in practice there are usually only a few valid transitions, and so a binary search does not seem to be +worthwhile). + +### Transition (size: 4 bytes) +Type | Name | Description +-----|------|------------ +uint24 | newStateOffset | (from level's stateDataBase) +uint8 | inputByte | the input byte (utf-8 code unit) for this transition diff --git a/third_party/rust/mapped_hyph/hyph_en_US.hyf b/third_party/rust/mapped_hyph/hyph_en_US.hyf Binary files differnew file mode 100644 index 0000000000..950ac36986 --- /dev/null +++ b/third_party/rust/mapped_hyph/hyph_en_US.hyf diff --git a/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs b/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs new file mode 100644 index 0000000000..257c747f54 --- /dev/null +++ b/third_party/rust/mapped_hyph/src/bin/hyf_compile.rs @@ -0,0 +1,44 @@ +// Copyright 2019-2020 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate log; +extern crate mapped_hyph; + +use std::env; +use std::fs::File; + +struct Logger {} + +impl log::Log for Logger { + fn enabled(&self, _: &log::Metadata) -> bool { + true + } + + fn log(&self, record: &log::Record) { + eprintln!("{} - {}", record.level(), record.args()); + } + + fn flush(&self) {} +} + +static LOGGER: Logger = Logger {}; + +fn main() -> std::io::Result<()> { + unsafe { log::set_logger_racy(&LOGGER).unwrap() }; + + let args: Vec<String> = env::args().collect(); + if args.len() == 3 { + let in_file = File::open(&args[1])?; + let mut out_file = File::create(&args[2])?; + mapped_hyph::builder::compile(&in_file, &mut out_file, true)?; + } else { + println!("usage: hyf_compile <pattern-file> <output-file>"); + } + Ok(()) +} diff --git a/third_party/rust/mapped_hyph/src/builder.rs b/third_party/rust/mapped_hyph/src/builder.rs new file mode 100644 index 0000000000..e19a0087fd --- /dev/null +++ b/third_party/rust/mapped_hyph/src/builder.rs @@ -0,0 +1,509 @@ +// Copyright 2019-2020 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// Functions to compile human-readable patterns into a mapped_hyph +/// flattened representation of the hyphenation state machine. + +use std::io::{Read,BufRead,BufReader,Write,Error,ErrorKind}; +use std::collections::HashMap; +use std::convert::TryInto; +use std::hash::{Hash,Hasher}; + +// Wrap a HashMap so that we can implement the Hash trait. +#[derive(PartialEq, Eq, Clone)] +struct TransitionMap (HashMap<u8,i32>); + +impl TransitionMap { + fn new() -> TransitionMap { + TransitionMap(HashMap::<u8,i32>::new()) + } +} + +impl Hash for TransitionMap { + fn hash<H: Hasher>(&self, state: &mut H) { + // We only look at the values here; that's likely to be enough + // for a reasonable hash. + let mut transitions: Vec<&i32> = self.0.values().collect(); + transitions.sort(); + for t in transitions { + t.hash(state); + } + } +} + +#[derive(PartialEq, Eq, Hash, Clone)] +struct State { + match_string: Option<Vec<u8>>, + #[allow(dead_code)] + repl_string: Option<Vec<u8>>, + #[allow(dead_code)] + repl_index: i32, + #[allow(dead_code)] + repl_cut: i32, + fallback_state: i32, + transitions: TransitionMap, +} + +impl State { + fn new() -> State { + State { + match_string: None, + repl_string: None, + repl_index: -1, + repl_cut: -1, + fallback_state: -1, + transitions: TransitionMap::new(), + } + } +} + +/// Structures returned by the read_dic_file() function; +/// array of these can then be passed to write_hyf_file() +/// to create the flattened output. +struct LevelBuilder { + states: Vec<State>, + str_to_state: HashMap<Vec<u8>,i32>, + encoding: Option<String>, + nohyphen: Option<String>, + lh_min: u8, + rh_min: u8, + clh_min: u8, + crh_min: u8, +} + +impl LevelBuilder { + fn new() -> LevelBuilder { + let mut result = LevelBuilder { + states: Vec::<State>::new(), + str_to_state: HashMap::<Vec<u8>,i32>::new(), + encoding: None, + nohyphen: None, + lh_min: 0, + rh_min: 0, + clh_min: 0, + crh_min: 0, + }; + // Initialize the builder with an empty start state. + result.str_to_state.insert(vec![], 0); + result.states.push(State::new()); + result + } + + fn find_state_number_for(&mut self, text: &[u8]) -> i32 { + let count = self.states.len() as i32; + let index = *self.str_to_state.entry(text.to_vec()).or_insert(count); + if index == count { + self.states.push(State::new()); + } + index + } + + fn add_pattern(&mut self, pattern: &str) { + let mut bytes = pattern.as_bytes(); + let mut text = Vec::<u8>::with_capacity(bytes.len()); + let mut digits = Vec::<u8>::with_capacity(bytes.len() + 1); + let mut repl_str = None; + let mut repl_index = 0; + let mut repl_cut = 0; + + // Check for replacement rule (non-standard hyphenation spelling change). + if let Some(slash) = bytes.iter().position(|x| *x == b'/') { + let parts = bytes.split_at(slash); + bytes = parts.0; + let mut it = parts.1[1 ..].split(|x| *x == b','); + if let Some(repl) = it.next() { + repl_str = Some(repl.to_vec()); + } + if let Some(num) = it.next() { + repl_index = std::str::from_utf8(num).unwrap().parse::<i32>().unwrap() - 1; + } + if let Some(num) = it.next() { + repl_cut = std::str::from_utf8(num).unwrap().parse::<i32>().unwrap(); + } + } + + // Separate the input pattern into parallel arrays of text (bytes) and digits. + let mut got_digit = false; + for byte in bytes { + if *byte <= b'9' && *byte >= b'0' { + if got_digit { + warn!("invalid pattern \"{}\": consecutive digits", pattern); + return; + } + digits.push(*byte); + got_digit = true; + } else { + text.push(*byte); + if got_digit { + got_digit = false; + } else { + digits.push(b'0'); + } + } + } + if !got_digit { + digits.push(b'0'); + } + + if repl_str.is_none() { + // Optimize away leading zeroes from the digits array. + while !digits.is_empty() && digits[0] == b'0' { + digits.remove(0); + } + } else { + // Convert repl_index and repl_cut from Unicode char to byte indexing. + let start = if text[0] == b'.' { 1 } else { 0 }; + if start == 1 { + if digits[0] != b'0' { + warn!("invalid pattern \"{}\": unexpected digit before start of word", pattern); + return; + } + digits.remove(0); + } + let word = std::str::from_utf8(&text[start..]).unwrap(); + let mut chars: Vec<_> = word.char_indices().collect(); + chars.push((word.len(), '.')); + repl_cut = chars[(repl_index + repl_cut) as usize].0 as i32 - chars[repl_index as usize].0 as i32; + repl_index = chars[repl_index as usize].0 as i32; + } + + // Create the new state, or add pattern into an existing state + // (which should not already have a match_string). + let mut state_num = self.find_state_number_for(&text); + let mut state = &mut self.states[state_num as usize]; + if state.match_string.is_some() { + warn!("duplicate pattern \"{}\" discarded", pattern); + return; + } + if !digits.is_empty() { + state.match_string = Some(digits); + } + if repl_str.is_some() { + state.repl_string = repl_str; + state.repl_index = repl_index; + state.repl_cut = repl_cut; + } + + // Set up prefix transitions, inserting additional states as needed. + while !text.is_empty() { + let last_state = state_num; + let ch = *text.last().unwrap(); + text.truncate(text.len() - 1); + state_num = self.find_state_number_for(&text); + if let Some(exists) = self.states[state_num as usize].transitions.0.insert(ch, last_state) { + assert_eq!(exists, last_state, "overwriting existing transition at pattern \"{}\"", pattern); + break; + } + } + } + + fn merge_duplicate_states(&mut self) { + // We loop here because when we eliminate a duplicate, and update the transitons + // that referenced it, we may thereby create new duplicates that another pass + // will find and compress further. + loop { + let orig_len = self.states.len(); + // Used to map State records to the (first) index at which they occur. + let mut state_to_index = HashMap::<&State,i32>::new(); + // Mapping of old->new state indexes, and whether each old state is + // a duplicate that should be dropped. + let mut mappings = Vec::<(i32,bool)>::with_capacity(orig_len); + let mut next_new_index: i32 = 0; + for index in 0 .. self.states.len() { + // Find existing index for this state, or allocate the next new index to it. + let new_index = *state_to_index.entry(&self.states[index]).or_insert(next_new_index); + // Record the mapping, and whether the state was a duplicate. + mappings.push((new_index, new_index != next_new_index)); + // If we used next_new_index for this state, increment it. + if new_index == next_new_index { + next_new_index += 1; + } + } + // If we didn't find any duplicates, next_new_index will have kept pace with + // index, so we know we're finished. + if next_new_index as usize == self.states.len() { + break; + } + // Iterate over all the states, either deleting them or updating indexes + // according to the mapping we created; then repeat the search. + for index in (0 .. self.states.len()).rev() { + if mappings[index].1 { + self.states.remove(index); + } else { + let state = &mut self.states[index]; + if state.fallback_state != -1 { + state.fallback_state = mappings[state.fallback_state as usize].0; + } + for t in state.transitions.0.iter_mut() { + *t.1 = mappings[*t.1 as usize].0; + } + } + } + } + } + + fn flatten(&self) -> Vec<u8> { + // Calculate total space needed for state data, and build the state_to_offset table. + let mut state_data_size = 0; + let mut state_to_offset = Vec::<usize>::with_capacity(self.states.len()); + for state in &self.states { + state_to_offset.push(state_data_size); + state_data_size += if state.repl_string.is_some() { 12 } else { 8 }; + state_data_size += state.transitions.0.len() * 4; + } + + // Helper to map a state index to its offset in the final data block. + let get_state_offset_for = |state_index: i32| -> u32 { + if state_index < 0 { + return super::INVALID_STATE_OFFSET; + } + state_to_offset[state_index as usize] as u32 + }; + + // Helper to map a byte string to its offset in the final data block, and + // store the bytes into string_data unless using an already-existing string. + let mut string_to_offset = HashMap::<Vec<u8>,usize>::new(); + let mut string_data = Vec::<u8>::new(); + let mut get_string_offset_for = |bytes: &Option<Vec<u8>>| -> u16 { + if bytes.is_none() { + return super::INVALID_STRING_OFFSET; + } + assert!(bytes.as_ref().unwrap().len() < 256); + let new_offset = string_data.len(); + let offset = *string_to_offset.entry(bytes.as_ref().unwrap().clone()).or_insert(new_offset); + if offset == new_offset { + string_data.push(bytes.as_ref().unwrap().len() as u8); + string_data.extend_from_slice(bytes.as_ref().unwrap().as_ref()); + } + offset.try_into().unwrap() + }; + + // Handle nohyphen string list if present, converting comma separators to NULs + // and trimming any surplus whitespace. + let mut nohyphen_string_offset: u16 = super::INVALID_STRING_OFFSET; + let mut nohyphen_count: u16 = 0; + if self.nohyphen.is_some() { + let nohyphen_strings: Vec<_> = self.nohyphen.as_ref().unwrap().split(',').map(|x| x.trim()).collect(); + nohyphen_count = nohyphen_strings.len().try_into().unwrap(); + nohyphen_string_offset = get_string_offset_for(&Some(nohyphen_strings.join("\0").as_bytes().to_vec())); + } + + let mut state_data = Vec::<u8>::with_capacity(state_data_size); + for state in &self.states { + state_data.extend(&get_state_offset_for(state.fallback_state).to_le_bytes()); + state_data.extend(&get_string_offset_for(&state.match_string).to_le_bytes()); + state_data.push(state.transitions.0.len() as u8); + // Determine whether to use an extended state record, and if so add the + // replacement string and index fields. + if state.repl_string.is_none() { + state_data.push(0); + } else { + state_data.push(1); + state_data.extend(&get_string_offset_for(&state.repl_string).to_le_bytes()); + state_data.push(state.repl_index as u8); + state_data.push(state.repl_cut as u8); + } + // Collect transitions into an array so we can sort them. + let mut transitions = vec![]; + for (key, value) in state.transitions.0.iter() { + transitions.push((*key, get_state_offset_for(*value))) + } + transitions.sort(); + for t in transitions { + // New state offset is stored as a 24-bit value, so we do this manually. + state_data.push((t.1 & 0xff) as u8); + state_data.push(((t.1 >> 8) & 0xff) as u8); + state_data.push(((t.1 >> 16) & 0xff) as u8); + state_data.push(t.0); + } + } + assert_eq!(state_data.len(), state_data_size); + + // Pad string data to a 4-byte boundary + while string_data.len() & 3 != 0 { + string_data.push(0); + } + + let total_size = super::LEVEL_HEADER_SIZE as usize + state_data_size + string_data.len(); + let mut result = Vec::<u8>::with_capacity(total_size); + + let state_data_base: u32 = super::LEVEL_HEADER_SIZE as u32; + let string_data_base: u32 = state_data_base + state_data_size as u32; + + result.extend(&state_data_base.to_le_bytes()); + result.extend(&string_data_base.to_le_bytes()); + result.extend(&nohyphen_string_offset.to_le_bytes()); + result.extend(&nohyphen_count.to_le_bytes()); + result.push(self.lh_min); + result.push(self.rh_min); + result.push(self.clh_min); + result.push(self.crh_min); + + result.extend(state_data.iter()); + result.extend(string_data.iter()); + + assert_eq!(result.len(), total_size); + + result + } +} + +/// Read a libhyphen-style pattern file and create the corresponding state +/// machine transitions, etc. +/// The returned Vec can be passed to write_hyf_file() to generate a flattened +/// representation of the state machine in mapped_hyph's binary format. +fn read_dic_file<T: Read>(dic_file: T, compress: bool) -> Result<Vec<LevelBuilder>, &'static str> { + let reader = BufReader::new(dic_file); + + let mut builders = Vec::<LevelBuilder>::new(); + builders.push(LevelBuilder::new()); + let mut builder = &mut builders[0]; + + for (index, line) in reader.lines().enumerate() { + let mut trimmed = line.unwrap().trim().to_string(); + // Strip comments. + if let Some(i) = trimmed.find('%') { + trimmed = trimmed[..i].trim().to_string(); + } + // Ignore empty lines. + if trimmed.is_empty() { + continue; + } + // Uppercase indicates keyword rather than pattern. + if trimmed.as_bytes()[0] >= b'A' && trimmed.as_bytes()[0] <= b'Z' { + // First line is encoding; we only support UTF-8. + if builder.encoding.is_none() { + if trimmed != "UTF-8" { + return Err("Only UTF-8 patterns are accepted!"); + }; + builder.encoding = Some(trimmed); + continue; + } + // Check for valid keyword-value pairs. + if trimmed.contains(' ') { + let parts: Vec<&str> = trimmed.split(' ').collect(); + if parts.len() != 2 { + warn!("unrecognized keyword/values: {}", trimmed); + continue; + } + let keyword = parts[0]; + let value = parts[1]; + match keyword { + "LEFTHYPHENMIN" => builder.lh_min = value.parse::<u8>().unwrap(), + "RIGHTHYPHENMIN" => builder.rh_min = value.parse::<u8>().unwrap(), + "COMPOUNDLEFTHYPHENMIN" => builder.clh_min = value.parse::<u8>().unwrap(), + "COMPOUNDRIGHTHYPHENMIN" => builder.crh_min = value.parse::<u8>().unwrap(), + "NOHYPHEN" => builder.nohyphen = Some(trimmed), + _ => warn!("unknown keyword: {}", trimmed), + } + continue; + } + // Start a new hyphenation level? + if trimmed == "NEXTLEVEL" { + builders.push(LevelBuilder::new()); + builder = builders.last_mut().unwrap(); + continue; + } + warn!("unknown keyword: {}", trimmed); + continue; + } + // Patterns should always be provided in lowercase; complain if not, and discard + // the bad pattern. + if trimmed != trimmed.to_lowercase() { + warn!("pattern \"{}\" not lowercased at line {}", trimmed, index); + continue; + } + builder.add_pattern(&trimmed); + } + + // Create default first (compound-word) level if only one level was provided. + // (Maybe this should be optional? Currently just copying libhyphen behavior.) + if builders.len() == 1 { + let (lh_min, rh_min, clh_min, crh_min) = + (builders[0].lh_min, builders[0].rh_min, builders[0].clh_min, builders[0].crh_min); + builders.insert(0, LevelBuilder::new()); + builder = builders.first_mut().unwrap(); + builder.add_pattern("1-1"); + builder.add_pattern("1'1"); + builder.add_pattern("1\u{2013}1"); // en-dash + builder.add_pattern("1\u{2019}1"); // curly apostrophe + builder.nohyphen = Some("',\u{2013},\u{2019},-".to_string()); + builder.lh_min = lh_min; + builder.rh_min = rh_min; + builder.clh_min = if clh_min > 0 { clh_min } else if lh_min > 0 { lh_min } else { 3 }; + builder.crh_min = if crh_min > 0 { crh_min } else if rh_min > 0 { rh_min } else { 3 }; + } + + // Put in fallback states in each builder. + for builder in &mut builders { + for (key, state_index) in builder.str_to_state.iter() { + if key.is_empty() { + continue; + } + let mut fallback_key = key.clone(); + while !fallback_key.is_empty() { + fallback_key.remove(0); + if builder.str_to_state.contains_key(&fallback_key) { + break; + } + } + builder.states[*state_index as usize].fallback_state = builder.str_to_state[&fallback_key]; + } + } + + if compress { + // Merge duplicate states to reduce size. + for builder in &mut builders { + builder.merge_duplicate_states(); + } + } + + Ok(builders) +} + +/// Write out the state machines representing a set of hyphenation rules +/// to the given output stream. +fn write_hyf_file<T: Write>(hyf_file: &mut T, levels: Vec<LevelBuilder>) -> std::io::Result<()> { + if levels.is_empty() { + return Err(Error::from(ErrorKind::InvalidData)); + } + let mut flattened = vec![]; + for level in levels { + flattened.push(level.flatten()); + } + // Write file header: magic number, count of levels. + hyf_file.write_all(&[b'H', b'y', b'f', b'0'])?; + let level_count: u32 = flattened.len() as u32; + hyf_file.write_all(&level_count.to_le_bytes())?; + // Write array of offsets to each level. First level will begin immediately + // after the array of offsets. + let mut offset: u32 = super::FILE_HEADER_SIZE as u32 + 4 * level_count; + for flat in &flattened { + hyf_file.write_all(&offset.to_le_bytes())?; + offset += flat.len() as u32; + } + // Write the flattened data for each level. + for flat in &flattened { + hyf_file.write_all(&flat)?; + } + Ok(()) +} + +/// The public API to the compilation process: reads `dic_file` and writes compiled tables +/// to `hyf_file`. The `compress` param determines whether extra processing to reduce the +/// size of the output is performed. +pub fn compile<T1: Read, T2: Write>(dic_file: T1, hyf_file: &mut T2, compress: bool) -> std::io::Result<()> { + match read_dic_file(dic_file, compress) { + Ok(dic) => write_hyf_file(hyf_file, dic), + Err(e) => { + warn!("parse error: {}", e); + return Err(Error::from(ErrorKind::InvalidData)) + } + } +} diff --git a/third_party/rust/mapped_hyph/src/ffi.rs b/third_party/rust/mapped_hyph/src/ffi.rs new file mode 100644 index 0000000000..1b546e2567 --- /dev/null +++ b/third_party/rust/mapped_hyph/src/ffi.rs @@ -0,0 +1,250 @@ +// Copyright 2019-2020 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::slice; +use std::str; +use std::ffi::CStr; +use std::fs::File; +use std::io::Read; +use std::os::raw::c_char; +use std::str::Utf8Error; + +use memmap2::Mmap; + +use super::Hyphenator; + +/// Opaque type representing a hyphenation dictionary loaded from a file, +/// for use in FFI function signatures. +pub struct HyphDic; + +/// Opaque type representing a compiled dictionary in a memory buffer. +pub struct CompiledData; + +// Helper to convert word and hyphen buffer parameters from raw C pointer/length +// pairs to the Rust types expected by mapped_hyph. +unsafe fn params_from_c<'a>(word: *const c_char, word_len: u32, + hyphens: *mut u8, hyphens_len: u32) -> + (Result<&'a str, Utf8Error>, &'a mut [u8]) { + (str::from_utf8(slice::from_raw_parts(word as *const u8, word_len as usize)), + slice::from_raw_parts_mut(hyphens, hyphens_len as usize)) +} + +/// C-callable function to load a hyphenation dictionary from a file at `path`. +/// +/// Returns null on failure. +/// +/// This does not fully validate that the file contains usable hyphenation +/// data, it only opens the file (read-only) and mmap's it into memory, and +/// does some minimal sanity-checking that it *might* be valid. +/// +/// The returned `HyphDic` must be released with `mapped_hyph_free_dictionary`. +/// +/// # Safety +/// The given `path` must be a valid pointer to a NUL-terminated (C-style) +/// string. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_load_dictionary(path: *const c_char) -> *const HyphDic { + let path_str = match CStr::from_ptr(path).to_str() { + Ok(str) => str, + Err(_) => return std::ptr::null(), + }; + let hyph = Box::new(match super::load_file(path_str) { + Some(dic) => dic, + _ => return std::ptr::null(), + }); + Box::into_raw(hyph) as *const HyphDic +} + +/// C-callable function to free a hyphenation dictionary +/// that was loaded by `mapped_hyph_load_dictionary`. +/// +/// # Safety +/// The `dic` parameter must be a `HyphDic` pointer obtained from +/// `mapped_hyph_load_dictionary`, and not previously freed. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_free_dictionary(dic: *mut HyphDic) { + Box::from_raw(dic); +} + +/// C-callable function to find hyphenation values for a given `word`, +/// using a dictionary loaded via `mapped_hyph_load_dictionary`. +/// +/// The `word` must be UTF-8-encoded, and is `word_len` bytes (not characters) +/// long. +/// +/// Caller must supply the `hyphens` output buffer for results; its size is +/// given in `hyphens_len`. +/// It should be at least `word_len` elements long. +/// +/// Returns -1 if `word` is not valid UTF-8, or the output `hyphens` buffer is +/// too small. +/// Otherwise returns the number of potential hyphenation positions found. +/// +/// # Panics +/// This function may panic if the given dictionary is not valid. +/// +/// # Safety +/// The `dic` parameter must be a `HyphDic` pointer obtained from +/// `mapped_hyph_load_dictionary`. +/// +/// The `word` and `hyphens` parameter must be valid pointers to memory buffers +/// of at least the respective sizes `word_len` and `hyphens_len`. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_find_hyphen_values_dic(dic: *const HyphDic, + word: *const c_char, word_len: u32, + hyphens: *mut u8, hyphens_len: u32) -> i32 { + if word_len > hyphens_len { + return -1; + } + let (word_str, hyphen_buf) = params_from_c(word, word_len, hyphens, hyphens_len); + if word_str.is_err() { + return -1; + } + Hyphenator::new(&*(dic as *const Mmap)) + .find_hyphen_values(word_str.unwrap(), hyphen_buf) as i32 +} + +/// C-callable function to find hyphenation values for a given `word`, +/// using a dictionary loaded and owned by the caller. +/// +/// The dictionary is supplied as a raw memory buffer `dic_buf` of size +/// `dic_len`. +/// +/// The `word` must be UTF-8-encoded, and is `word_len` bytes (not characters) +/// long. +/// +/// Caller must supply the `hyphens` output buffer for results; its size is +/// given in `hyphens_len`. +/// It should be at least `word_len` elements long. +/// +/// Returns -1 if `word` is not valid UTF-8, or the output `hyphens` buffer is +/// too small. +/// Otherwise returns the number of potential hyphenation positions found. +/// +/// # Panics +/// This function may panic if the given dictionary is not valid. +/// +/// # Safety +/// The `dic_buf` parameter must be a valid pointer to a memory block of size +/// at least `dic_len`. +/// +/// The `word` and `hyphens` parameter must be valid pointers to memory buffers +/// of at least the respective sizes `word_len` and `hyphens_len`. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_find_hyphen_values_raw(dic_buf: *const u8, dic_len: u32, + word: *const c_char, word_len: u32, + hyphens: *mut u8, hyphens_len: u32) -> i32 { + if word_len > hyphens_len { + return -1; + } + let (word_str, hyphen_buf) = params_from_c(word, word_len, hyphens, hyphens_len); + if word_str.is_err() { + return -1; + } + Hyphenator::new(slice::from_raw_parts(dic_buf, dic_len as usize)) + .find_hyphen_values(word_str.unwrap(), hyphen_buf) as i32 +} + +/// C-callable function to check if a given memory buffer `dic_buf` of size +/// `dic_len` is potentially usable as a hyphenation dictionary. +/// +/// Returns `true` if the given memory buffer looks like it may be a valid +/// hyphenation dictionary, `false` if it is clearly not usable. +/// +/// # Safety +/// The `dic_buf` parameter must be a valid pointer to a memory block of size +/// at least `dic_len`. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_is_valid_hyphenator(dic_buf: *const u8, dic_len: u32) -> bool { + if dic_buf.is_null() { + return false; + } + let dic = Hyphenator::new(slice::from_raw_parts(dic_buf, dic_len as usize)); + dic.is_valid_hyphenator() +} + +/// C-callable function to free a CompiledData object created by +/// a `mapped_hyph_compile_...` function (below). +/// +/// # Safety +/// The `data` parameter must be a `CompiledData` pointer obtained from +/// a `mapped_hyph_compile_...` function, and not previously freed. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_free_compiled_data(data: *mut CompiledData) { + Box::from_raw(data); +} + +// Helper for the compilation functions (from either memory buffer or file path). +fn compile_and_wrap<T: Read>(input: T, compress: bool) -> *const CompiledData { + let mut compiled: Vec<u8> = vec![]; + if super::builder::compile(input, &mut compiled, compress).is_err() { + return std::ptr::null(); + } + compiled.shrink_to_fit(); + + // Create a persistent heap reference to the compiled data, and return a pointer to it. + Box::into_raw(Box::new(compiled)) as *const CompiledData +} + +/// C-callable function to compile hyphenation patterns from `pattern_buf` and return +/// the compiled data in a memory buffer, suitable to be stored somewhere or passed +/// to `mapped_hyph_find_hyphen_values_raw` to perform hyphenation. +/// +/// The returned `CompiledData` must be released with `mapped_hyph_free_compiled_data`. +/// +/// # Safety +/// The `pattern_buf` parameter must be a valid pointer to a memory block of size +/// at least `pattern_len`. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_compile_buffer(pattern_buf: *const u8, pattern_len: u32, compress: bool) -> *const CompiledData { + compile_and_wrap(slice::from_raw_parts(pattern_buf, pattern_len as usize), compress) +} + +/// C-callable function to compile hyphenation patterns from a file to a memory buffer. +/// +/// The returned `CompiledData` must be released with `mapped_hyph_free_compiled_data`. +/// +/// # Safety +/// The given `path` must be a valid pointer to a NUL-terminated (C-style) string. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_compile_file(path: *const c_char, compress: bool) -> *const CompiledData { + // Try to open the file at the given path, returning null on failure. + let path_str = match CStr::from_ptr(path).to_str() { + Ok(str) => str, + Err(_) => return std::ptr::null(), + }; + let in_file = match File::open(path_str) { + Ok(file) => file, + Err(_) => return std::ptr::null(), + }; + compile_and_wrap(&in_file, compress) +} + +/// Get the size of the compiled table buffer in a `CompiledData` object. +/// +/// # Safety +/// The `data` parameter must be a `CompiledData` pointer obtained from +/// a `mapped_hyph_compile_...` function, and not previously freed. +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_compiled_data_size(data: *const CompiledData) -> u32 { + (&*(data as *const Vec<u8>)).len() as u32 +} + +/// Get a pointer to the raw data held by a `CompiledData` object. +/// +/// # Safety +/// The `data` parameter must be a `CompiledData` pointer obtained from +/// a `mapped_hyph_compile_...` function, and not previously freed. +/// +/// The returned pointer only remains valid as long as the `CompiledData` has not +/// been released (by passing it to `mapped_hyph_free_compiled_data`). +#[no_mangle] +pub unsafe extern "C" fn mapped_hyph_compiled_data_ptr(data: *const CompiledData) -> *const u8 { + (&*(data as *const Vec<u8>)).as_ptr() +} diff --git a/third_party/rust/mapped_hyph/src/lib.rs b/third_party/rust/mapped_hyph/src/lib.rs new file mode 100644 index 0000000000..848c93d257 --- /dev/null +++ b/third_party/rust/mapped_hyph/src/lib.rs @@ -0,0 +1,642 @@ +// Copyright 2019 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_use] +extern crate arrayref; +extern crate memmap2; +#[macro_use] +extern crate log; + +use std::slice; +use std::str; +use std::cmp::max; +use std::fs::File; +use std::mem; + +use memmap2::Mmap; + +// Make submodules available publicly. +pub mod builder; +pub mod ffi; + +// 4-byte identification expected at beginning of a compiled dictionary file. +// (This will be updated if an incompatible change to the format is made in +// some future revision.) +const MAGIC_NUMBER: [u8; 4] = [b'H', b'y', b'f', b'0']; + +const INVALID_STRING_OFFSET: u16 = 0xffff; +const INVALID_STATE_OFFSET: u32 = 0x00ff_ffff; + +const FILE_HEADER_SIZE: usize = 8; // 4-byte magic number, 4-byte count of levels +const LEVEL_HEADER_SIZE: usize = 16; + +// Transition actually holds a 24-bit new state offset and an 8-bit input byte +// to match. We will be interpreting byte ranges as Transition arrays (in the +// State::transitions() method below), so use repr(C) to ensure we have the +// memory layout we expect. +// Transition records do not depend on any specific alignment. +#[repr(C)] +#[derive(Debug,Copy,Clone)] +struct Transition(u8, u8, u8, u8); + +impl Transition { + fn new_state_offset(&self) -> usize { + // Read a 24-bit little-endian number from three bytes. + self.0 as usize + ((self.1 as usize) << 8) + ((self.2 as usize) << 16) + } + fn match_byte(&self) -> u8 { + self.3 + } +} + +// State is an area of the Level's data block that begins with a fixed header, +// followed by an array of transitions. The total size of each State's data +// depends on the number of transitions in the state. Only the basic header +// is defined by the struct here; the rest of the state is accessed via +// pointer magic. +// There are two versions of State, a basic version that supports only simple +// hyphenation (no associated spelling change), and an extended version that +// adds the replacement-string fields to support spelling changes at the +// hyphenation point. Check is_extended() to know which version is present. +// State records are NOT necessarily 4-byte aligned, so multi-byte fields +// should be read with care. +#[derive(Debug,Copy,Clone)] +#[repr(C)] +struct State { + fallback_state: [u8; 4], + match_string_offset: [u8; 2], + num_transitions: u8, + is_extended: u8, +} + +#[repr(C)] +struct StateExtended { + state: State, + repl_string_offset: [u8; 2], + repl_index: i8, + repl_cut: i8, +} + +impl State { + // Accessors for the various State header fields; see file format description. + fn fallback_state(&self) -> usize { + u32::from_le_bytes(self.fallback_state) as usize + } + fn match_string_offset(&self) -> usize { + u16::from_le_bytes(self.match_string_offset) as usize + } + fn num_transitions(&self) -> u8 { + self.num_transitions + } + fn is_extended(&self) -> bool { + self.is_extended != 0 + } + // Accessors that are only valid if is_extended() is true. + // These use `unsafe` to dereference a pointer to the relevant field; + // this is OK because Level::get_state always validates the total state size + // before returning a state reference, so these pointers will be valid for + // any extended state it returns. + #[allow(dead_code)] + fn as_extended(&self) -> &StateExtended { + debug_assert!(self.is_extended()); + unsafe { mem::transmute(self) } + } + #[allow(dead_code)] + fn repl_string_offset(&self) -> usize { + u16::from_le_bytes(self.as_extended().repl_string_offset) as usize + } + #[allow(dead_code)] + fn repl_index(&self) -> i8 { + self.as_extended().repl_index + } + #[allow(dead_code)] + fn repl_cut(&self) -> i8 { + self.as_extended().repl_cut + } + // Return the state's Transitions as a slice reference. + fn transitions(&self) -> &[Transition] { + let count = self.num_transitions() as usize; + if count == 0 { + return &[]; + } + let transition_offset = if self.is_extended() { mem::size_of::<StateExtended>() } else { mem::size_of::<State>() } as isize; + // We know the `offset` here will not look beyond the valid range of memory + // because Level::get_state() checks the state length (accounting for the + // number of transitions) before returning a State reference. + let trans_ptr = unsafe { (self as *const State as *const u8).offset(transition_offset) as *const Transition }; + // Again, because Level::get_state() already checked the state length, we know + // this slice address and count will be valid. + unsafe { slice::from_raw_parts(trans_ptr, count) } + } + // Look up the Transition for a given input byte, or None. + fn transition_for(&self, b: u8) -> Option<Transition> { + // The transitions array is sorted by match_byte() value, but there are + // usually very few entries; benchmarking showed that using binary_search_by + // here gave no benefit (possibly slightly slower). + self.transitions().iter().copied().find(|t| t.match_byte() == b) + } + // Just for debugging use... + #[allow(dead_code)] + fn deep_show(&self, prefix: &str, dic: &Level) { + if self.match_string_offset() != INVALID_STRING_OFFSET as usize { + let match_string = dic.string_at_offset(self.match_string_offset()); + println!("{}match: {}", prefix, str::from_utf8(match_string).unwrap()); + } + for t in self.transitions() { + println!("{}{} ->", prefix, t.match_byte() as char); + let next_prefix = format!("{} ", prefix); + dic.get_state(t.new_state_offset()).unwrap().deep_show(&next_prefix, &dic); + } + } +} + +// We count the presentation-form ligature characters U+FB00..FB06 as multiple +// chars for the purposes of lefthyphenmin/righthyphenmin. In UTF-8, all these +// ligature characters are 3-byte sequences beginning with <0xEF, 0xAC>; this +// helper returns the "decomposed length" of the ligature given its trailing +// byte. +fn lig_length(trail_byte: u8) -> usize { + // This is only called on valid UTF-8 where we already know trail_byte + // must be >= 0x80. + // Ligature lengths: ff fi fl ffi ffl long-st st + const LENGTHS: [u8; 7] = [ 2u8, 2u8, 2u8, 3u8, 3u8, 2u8, 2u8 ]; + if trail_byte > 0x86 { + return 1; + } + LENGTHS[trail_byte as usize - 0x80] as usize +} + +fn is_utf8_trail_byte(byte: u8) -> bool { + (byte & 0xC0) == 0x80 +} + +fn is_ascii_digit(byte: u8) -> bool { + byte <= b'9' && byte >= b'0' +} + +fn is_odd(byte: u8) -> bool { + (byte & 0x01) == 0x01 +} + +// A hyphenation Level has a header followed by State records and packed string +// data. The total size of the slice depends on the number and size of the +// States and Strings it contains. +// Note that the data of the Level may not have any specific alignment! +#[derive(Debug,Copy,Clone)] +struct Level<'a> { + data: &'a [u8], + // Header fields cached by the constructor for faster access: + state_data_base_: usize, + string_data_base_: usize, +} + +impl Level<'_> { + // Constructor that initializes our cache variables. + fn new(data: &[u8]) -> Level { + Level { + data, + state_data_base_: u32::from_le_bytes(*array_ref!(data, 0, 4)) as usize, + string_data_base_: u32::from_le_bytes(*array_ref!(data, 4, 4)) as usize, + } + } + + // Accessors for Level header fields; see file format description. + fn state_data_base(&self) -> usize { + self.state_data_base_ // cached by constructor + } + fn string_data_base(&self) -> usize { + self.string_data_base_ // cached by constructor + } + fn nohyphen_string_offset(&self) -> usize { + u16::from_le_bytes(*array_ref!(self.data, 8, 2)) as usize + } + #[allow(dead_code)] + fn nohyphen_count(&self) -> u16 { + u16::from_le_bytes(*array_ref!(self.data, 10, 2)) + } + fn lh_min(&self) -> usize { + max(1, self.data[12] as usize) + } + fn rh_min(&self) -> usize { + max(1, self.data[13] as usize) + } + fn clh_min(&self) -> usize { + max(1, self.data[14] as usize) + } + fn crh_min(&self) -> usize { + max(1, self.data[15] as usize) + } + fn word_boundary_mins(&self) -> (usize, usize, usize, usize) { + (self.lh_min(), self.rh_min(), self.clh_min(), self.crh_min()) + } + // Strings are represented as offsets from the Level's string_data_base. + // This returns a byte slice referencing the string at a given offset, + // or an empty slice if invalid. + fn string_at_offset(&self, offset: usize) -> &'_ [u8] { + if offset == INVALID_STRING_OFFSET as usize { + return &[]; + } + let string_base = self.string_data_base() as usize + offset; + // TODO: move this to the validation function. + debug_assert!(string_base < self.data.len()); + if string_base + 1 > self.data.len() { + return &[]; + } + let len = self.data[string_base] as usize; + // TODO: move this to the validation function. + debug_assert!(string_base + 1 + len <= self.data.len()); + if string_base + 1 + len > self.data.len() { + return &[]; + } + self.data.get(string_base + 1 .. string_base + 1 + len).unwrap() + } + // The nohyphen field actually contains multiple NUL-separated substrings; + // return them as a vector of individual byte slices. + fn nohyphen(&self) -> Vec<&[u8]> { + let string_offset = self.nohyphen_string_offset(); + let nohyph_str = self.string_at_offset(string_offset as usize); + if nohyph_str.is_empty() { + return vec![]; + } + nohyph_str.split(|&b| b == 0).collect() + } + // States are represented as an offset from the Level's state_data_base. + // This returns a reference to the State at a given offset, or None if invalid. + fn get_state(&self, offset: usize) -> Option<&State> { + if offset == INVALID_STATE_OFFSET as usize { + return None; + } + debug_assert_eq!(offset & 3, 0); + let state_base = self.state_data_base() + offset; + // TODO: move this to the validation function. + debug_assert!(state_base + mem::size_of::<State>() <= self.string_data_base()); + if state_base + mem::size_of::<State>() > self.string_data_base() { + return None; + } + let state_ptr = &self.data[state_base] as *const u8 as *const State; + // This is safe because we just checked against self.string_data_base() above. + let state = unsafe { state_ptr.as_ref().unwrap() }; + let length = if state.is_extended() { mem::size_of::<StateExtended>() } else { mem::size_of::<State>() } + + mem::size_of::<Transition>() * state.num_transitions() as usize; + // TODO: move this to the validation function. + debug_assert!(state_base + length <= self.string_data_base()); + if state_base + length > self.string_data_base() { + return None; + } + // This is safe because we checked the full state length against self.string_data_base(). + unsafe { state_ptr.as_ref() } + } + // Sets hyphenation values (odd = potential break, even = no break) in values[], + // and returns the change in the number of odd values present, so the caller can + // keep track of the total number of potential breaks in the word. + fn find_hyphen_values(&self, word: &str, values: &mut [u8], lh_min: usize, rh_min: usize) -> isize { + // Bail out immediately if the word is too short to hyphenate. + if word.len() < lh_min + rh_min { + return 0; + } + let start_state = self.get_state(0); + let mut st = start_state; + let mut hyph_count = 0; + for i in 0 .. word.len() + 2 { + // Loop over the word by bytes, with a virtual '.' added at each end + // to match word-boundary patterns. + let b = if i == 0 || i == word.len() + 1 { b'.' } else { word.as_bytes()[i - 1] }; + loop { + // Loop to repeatedly fall back if we don't find a matching transition. + // Note that this could infinite-loop if there is a state whose fallback + // points to itself (or a cycle of fallbacks), but this would represent + // a table compilation error. + // (A potential validation function could check for fallback cycles.) + if st.is_none() { + st = start_state; + break; + } + let state = st.unwrap(); + if let Some(tr) = state.transition_for(b) { + // Found a transition for the current byte. Look up the new state; + // if it has a match_string, merge its weights into `values`. + st = self.get_state(tr.new_state_offset()); + if let Some(state) = st { + let match_offset = state.match_string_offset(); + if match_offset != INVALID_STRING_OFFSET as usize { + if state.is_extended() { + debug_assert!(false, "extended hyphenation not supported by this function"); + } else { + let match_str = self.string_at_offset(match_offset); + let offset = i + 1 - match_str.len(); + assert!(offset + match_str.len() <= word.len() + 2); + for (j, ch) in match_str.iter().enumerate() { + let index = offset + j; + if index >= lh_min && index <= word.len() - rh_min { + // lh_min and rh_min are guaranteed to be >= 1, + // so this will not try to access outside values[]. + let old_value = values[index - 1]; + let value = ch - b'0'; + if value > old_value { + if is_odd(old_value) != is_odd(value) { + // Adjust hyph_count for the change we're making + hyph_count += if is_odd(value) { 1 } else { -1 }; + } + values[index - 1] = value; + } + } + } + } + } + } + // We have handled the current input byte; leave the fallback loop + // and get next input. + break; + } + // No transition for the current byte; go to fallback state and try again. + st = self.get_state(state.fallback_state()); + } + } + + // If the word was not purely ASCII, or if the word begins/ends with + // digits, the use of lh_min and rh_min above may not have correctly + // excluded enough positions, so we need to fix things up here. + let mut index = 0; + let mut count = 0; + let word_bytes = word.as_bytes(); + let mut clear_hyphen_at = |i| { if is_odd(values[i]) { hyph_count -= 1; } values[i] = 0; }; + // Handle lh_min. + while count < lh_min - 1 && index < word_bytes.len() { + let byte = word_bytes[index]; + clear_hyphen_at(index); + if byte < 0x80 { + index += 1; + if is_ascii_digit(byte) { + continue; // ASCII digits don't count + } + } else if byte == 0xEF && word_bytes[index + 1] == 0xAC { + // Unicode presentation-form ligature characters, which we count as + // multiple chars for the purpose of lh_min/rh_min, all begin with + // 0xEF, 0xAC in UTF-8. + count += lig_length(word_bytes[index + 2]); + clear_hyphen_at(index + 1); + clear_hyphen_at(index + 2); + index += 3; + continue; + } else { + index += 1; + while index < word_bytes.len() && is_utf8_trail_byte(word_bytes[index]) { + clear_hyphen_at(index); + index += 1; + } + } + count += 1; + } + + // Handle rh_min. + count = 0; + index = word.len(); + while count < rh_min && index > 0 { + index -= 1; + let byte = word_bytes[index]; + if index < word.len() - 1 { + clear_hyphen_at(index); + } + if byte < 0x80 { + // Only count if not an ASCII digit + if !is_ascii_digit(byte) { + count += 1; + } + continue; + } + if is_utf8_trail_byte(byte) { + continue; + } + if byte == 0xEF && word_bytes[index + 1] == 0xAC { + // Presentation-form ligatures count as multiple chars. + count += lig_length(word_bytes[index + 2]); + continue; + } + count += 1; + } + + hyph_count + } +} + +/// Hyphenation engine encapsulating a language-specific set of patterns (rules) +/// that identify possible break positions within a word. +pub struct Hyphenator<'a>(&'a [u8]); + +impl Hyphenator<'_> { + /// Return a Hyphenator that wraps the given buffer. + /// This does *not* check that the given buffer is in fact a valid hyphenation table. + /// Use `is_valid_hyphenator()` to determine whether it is usable. + /// (Calling hyphenation methods on a Hyphenator that wraps arbitrary, + /// unvalidated data is not unsafe, but may panic.) + pub fn new(buffer: &[u8]) -> Hyphenator { + Hyphenator(buffer) + } + + // Internal implementation details + fn magic_number(&self) -> &[u8] { + &self.0[0 .. 4] + } + fn num_levels(&self) -> usize { + u32::from_le_bytes(*array_ref!(self.0, 4, 4)) as usize + } + fn level(&self, i: usize) -> Level { + let offset = u32::from_le_bytes(*array_ref!(self.0, FILE_HEADER_SIZE + 4 * i, 4)) as usize; + let limit = if i == self.num_levels() - 1 { + self.0.len() + } else { + u32::from_le_bytes(*array_ref!(self.0, FILE_HEADER_SIZE + 4 * i + 4, 4)) as usize + }; + debug_assert!(offset + LEVEL_HEADER_SIZE <= limit && limit <= self.0.len()); + debug_assert_eq!(offset & 3, 0); + debug_assert_eq!(limit & 3, 0); + Level::new(&self.0[offset .. limit]) + } + + /// Identify acceptable hyphenation positions in the given `word`. + /// + /// The caller-supplied `values` must be at least as long as the `word`. + /// + /// On return, any elements with an odd value indicate positions in the word + /// after which a hyphen could be inserted. + /// + /// Returns the number of possible hyphenation positions that were found. + /// + /// # Panics + /// If the given `values` slice is too small to hold the results. + /// + /// If the block of memory represented by `self.0` is not in fact a valid + /// hyphenation dictionary, this function may panic with an overflow or + /// array bounds violation. + pub fn find_hyphen_values(&self, word: &str, values: &mut [u8]) -> isize { + assert!(values.len() >= word.len()); + values.iter_mut().for_each(|x| *x = 0); + let top_level = self.level(0); + let (lh_min, rh_min, clh_min, crh_min) = top_level.word_boundary_mins(); + if word.len() < lh_min + rh_min { + return 0; + } + let mut hyph_count = top_level.find_hyphen_values(word, values, lh_min, rh_min); + let compound = hyph_count > 0; + // Subsequent levels are applied to fragments between potential breaks + // already found: + for l in 1 .. self.num_levels() { + let level = self.level(l); + if hyph_count > 0 { + let mut begin = 0; + let mut lh = lh_min; + // lh_min and rh_min are both guaranteed to be greater than zero, + // so this loop will not reach fully to the end of the word. + for i in lh_min - 1 .. word.len() - rh_min { + if is_odd(values[i]) { + if i > begin { + // We've found a component of a compound; + // clear the corresponding values and apply the new level. + // (These values must be even, so hyph_count is unchanged.) + values[begin .. i].iter_mut().for_each(|x| { + *x = 0; + }); + hyph_count += level.find_hyphen_values(&word[begin ..= i], + &mut values[begin ..= i], + lh, crh_min); + } + begin = i + 1; + lh = clh_min; + } + } + if begin == 0 { + // No compound-word breaks were found, just apply level to the whole word. + hyph_count += level.find_hyphen_values(word, values, lh_min, rh_min); + } else if begin < word.len() { + // Handle trailing component of compound. + hyph_count += level.find_hyphen_values(&word[begin .. word.len()], + &mut values[begin .. word.len()], + clh_min, rh_min); + } + } else { + hyph_count += level.find_hyphen_values(word, values, lh_min, rh_min); + } + } + + // Only need to check nohyphen strings if top-level (compound) breaks were found. + if compound && hyph_count > 0 { + let nohyph = top_level.nohyphen(); + if !nohyph.is_empty() { + for i in lh_min ..= word.len() - rh_min { + if is_odd(values[i - 1]) { + for nh in &nohyph { + if i + nh.len() <= word.len() && *nh == &word.as_bytes()[i .. i + nh.len()] { + values[i - 1] = 0; + hyph_count -= 1; + break; + } + if nh.len() <= i && *nh == &word.as_bytes()[i - nh.len() .. i] { + values[i - 1] = 0; + hyph_count -= 1; + break; + } + } + } + } + } + } + + hyph_count + } + + /// Generate the hyphenated form of a `word` by inserting the given `hyphen_char` + /// at each valid break position. + /// + /// # Panics + /// If the block of memory represented by `self` is not in fact a valid + /// hyphenation dictionary, this function may panic with an overflow or + /// array bounds violation. + /// + /// Also panics if the length of the hyphenated word would overflow `usize`. + pub fn hyphenate_word(&self, word: &str, hyphchar: char) -> String { + let mut values = vec![0u8; word.len()]; + let hyph_count = self.find_hyphen_values(word, &mut values); + if hyph_count <= 0 { + return word.to_string(); + } + // We know how long the result will be, so we can preallocate here. + let result_len = word.len() + hyph_count as usize * hyphchar.len_utf8(); + let mut result = String::with_capacity(result_len); + let mut n = 0; + for ch in word.char_indices() { + if ch.0 > 0 && is_odd(values[ch.0 - 1]) { + result.push(hyphchar); + n += 1; + } + result.push(ch.1); + } + debug_assert_eq!(n, hyph_count); + debug_assert_eq!(result_len, result.len()); + result + } + + /// Check if the block of memory looks like it could be a valid hyphenation + /// table. + pub fn is_valid_hyphenator(&self) -> bool { + // Size must be at least 4 bytes for magic_number + 4 bytes num_levels; + // smaller than this cannot be safely inspected. + if self.0.len() < FILE_HEADER_SIZE { + return false; + } + if self.magic_number() != MAGIC_NUMBER { + return false; + } + // For each level, there's a 4-byte offset in the header, and the level + // has its own 16-byte header, so we can check a minimum size again here. + let num_levels = self.num_levels(); + if self.0.len() < FILE_HEADER_SIZE + LEVEL_HEADER_SIZE * num_levels { + return false; + } + // Check that state_data_base and string_data_base for each hyphenation + // level are within range. + for l in 0 .. num_levels { + let level = self.level(l); + if level.state_data_base() < LEVEL_HEADER_SIZE || + level.state_data_base() > level.string_data_base() || + level.string_data_base() > level.data.len() { + return false; + } + // TODO: consider doing more extensive validation of states and + // strings within the level? + } + // It's still possible the dic is internally broken, but at least it's + // worth trying to use it! + true + } +} + +/// Load the compiled hyphenation file at `dic_path`, if present. +/// +/// Returns `None` if the specified file cannot be opened or mapped, +/// otherwise returns a `memmap2::Mmap` mapping the file. +/// +/// # Safety +/// +/// This is unsafe for the same reason `Mmap::map()` is unsafe: +/// mapped_hyph does not guarantee safety if the mapped file is modified +/// (e.g. by another process) while we're using it. +/// +/// This verifies that the file looks superficially like it may be a +/// compiled hyphenation table, but does *not* fully check the validity +/// of the file contents! Calling hyphenation functions with the returned +/// data is not unsafe, but may panic if the data is invalid. +pub unsafe fn load_file(dic_path: &str) -> Option<Mmap> { + let file = File::open(dic_path).ok()?; + let dic = Mmap::map(&file).ok()?; + let hyph = Hyphenator(&*dic); + if hyph.is_valid_hyphenator() { + return Some(dic); + } + None +} diff --git a/third_party/rust/mapped_hyph/src/main.rs b/third_party/rust/mapped_hyph/src/main.rs new file mode 100644 index 0000000000..acc24babee --- /dev/null +++ b/third_party/rust/mapped_hyph/src/main.rs @@ -0,0 +1,67 @@ +// Copyright 2019 Mozilla Foundation. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate mapped_hyph; + +use mapped_hyph::Hyphenator; + +fn main() { + let dic_path = "hyph_en_US.hyf"; + + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + + println!("{}", hyph.hyphenate_word("haha", '-')); + println!("{}", hyph.hyphenate_word("hahaha", '-')); + println!("{}", hyph.hyphenate_word("photo", '-')); + println!("{}", hyph.hyphenate_word("photograph", '-')); + println!("{}", hyph.hyphenate_word("photographer", '-')); + println!("{}", hyph.hyphenate_word("photographic", '-')); + println!("{}", hyph.hyphenate_word("photographical", '-')); + println!("{}", hyph.hyphenate_word("photographically", '-')); + println!("{}", hyph.hyphenate_word("supercalifragilisticexpialidocious", '-')); + println!("{}", hyph.hyphenate_word("o'dwyer", '=')); + println!("{}", hyph.hyphenate_word("o'callahan", '=')); + println!("{}", hyph.hyphenate_word("o’dwyer", '=')); + println!("{}", hyph.hyphenate_word("o’callahan", '=')); + println!("{}", hyph.hyphenate_word("petti-fogging", '=')); + println!("{}", hyph.hyphenate_word("e-mailing", '=')); + println!("{}", hyph.hyphenate_word("-x-mailing", '=')); + println!("{}", hyph.hyphenate_word("-strikeout-", '=')); + + let dic2 = match unsafe { mapped_hyph::load_file("tests/compound.hyf") } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", "tests/compound.hyf"), + }; + + let h2 = Hyphenator::new(&*dic2); + println!("{}", h2.hyphenate_word("motorcycle", '=')); + + let dic3 = match unsafe { mapped_hyph::load_file("tests/rhmin.hyf") } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let h3 = Hyphenator::new(&*dic3); + println!("{}", h3.hyphenate_word("övéit", '=')); + println!("{}", h3.hyphenate_word("అంగడిధర", '=')); + + let dic4 = match unsafe { mapped_hyph::load_file("tests/num.hyf") } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", "tests/num.hyf"), + }; + let h4 = Hyphenator::new(&*dic4); + + println!("{}", h4.hyphenate_word("123foobar123", '=')); + println!("{}", h4.hyphenate_word("123foobarfoobar", '=')); + println!("{}", h4.hyphenate_word("foobarfoobar123", '=')); + println!("{}", h4.hyphenate_word("123foobarfoobar123", '=')); +} diff --git a/third_party/rust/mapped_hyph/tests/base.hyf b/third_party/rust/mapped_hyph/tests/base.hyf Binary files differnew file mode 100644 index 0000000000..e2b6df3d2a --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/base.hyf diff --git a/third_party/rust/mapped_hyph/tests/base.hyph b/third_party/rust/mapped_hyph/tests/base.hyph new file mode 100644 index 0000000000..550c57c9ad --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/base.hyph @@ -0,0 +1,4543 @@ +aarhus +abase +abate +abbeys +abby +abducts +aber=ra=tions +ab=hor=rer +abil=i=ties +ab=jur=ing +ablest +abodes +abo=li=tion=ist +abor=tion +about +abram +abridged +abruptly +ab=sconds +ab=sently +ab=solved +ab=sorp=tion +ab=sti=nence +ab=strac=tor +abun=dance +abuts +abyssinian +aca=pulco +ac=cel=er=a=tor +ac=cen=tu=ated +ac=cepted +ac=ces=si=bil=ity +ac=ci=den=tal +ac=cli=mated +ac=com=mo=dat=ing +ac=com=pa=ny=ing +ac=com=plish=ments +ac=cords +ac=coun=tant +ac=cre=tion +ac=cul=tur=a=tion +ac=cu=racy +ac=cused +aces +achieve +acid +ac=knowl=edge=able +acme +acous=tics +ac=qui=es=cent +ac=quis=i=tive +acres +acrop=o=lis +acti=nome=ters +ac=ti=va=tors +ac=tors +ac=tu=ar=ial +acute +ada=gios +adap=ta=tion +adapts +ad=dict=ing +ad=di=tions +ad=dresser +ad=duc=ing +aden +ad=her=ents +adi=a=bat=i=cally +ad=join=ing +ad=judg=ing +ad=jured +ad=just=ment +ad=min=is=ter +ad=min=is=tra=tively +ad=mire +ad=mis=sions +ad=mixed +ad=mo=ni=tions +adopted +adore +adrian +ad=sorbs +adul=terer +ad=um=brat=ing +ad=van=ta=geous +ad=ven=tur=ers +ad=versely +ad=ver=tises +ad=visees +ad=vo=cacy +aer=ate +aer=obac=ter +aerosols +af=fairs +af=fec=tions +af=fil=i=at=ing +af=firmed +af=flic=tion +af=fords +afghans +afore=thought +african=izes +af=ter=im=age +af=ter=ward +age +ager +ag=glu=ti=nated +ag=gra=va=tion +ag=gres=sive +ag=ile +ag=i=ta=tor +ag=o=nies +agree=ably +agri=cul=tur=ally +aide +ail=ing +aims +air=drops +air=foil +air=line +air=planes +air=tight +akin +alamo +alas +al=ba=tross +al=bums +al=co=holism +aldrich +alert=ing +alexan=dria +alga +al=ge=rian +al=go=rithms +ali=cia +aligned +al=is=tair +al=lan +al=leges +al=le=gory +al=ler=gic +al=ley=way +al=lit=er=a=tion +al=lo=ca=tor +al=lots +al=low=ing +al=lure=ment +al=maden +al=nico +aloof=ness +al=pha=bet=ized +al=sa=tian +al=ter=ations +al=ter=nates +al=thaea +al=tru=is=ti=cally +alve=o=lar +amal=ga=mate +amass +amaze +ama=zons +am=bigu=ous +am=bled +am=bushed +amend +amer=ica +amer=i=cans +amide +am=mo=nia +among +amor=tized +amour +am=phib=ians +am=pli=fiers +am=pu=tated +amuse=ment +an=abap=tist +ana=gram +ana=logue +an=a=lyt=ic=i=ties +anaphoric +anas=to=moses +anatomy +an=chorite +an=dalu=sia +an=dover +anec=dote +anes=thetic +an=ge=leno +an=gered +an=glia +an=gola +an=gu=lar +an=i=mated +an=i=mism +anita +an=napo=lis +an=ni=hi=lated +an=no=ta=tion +an=noy +an=nu=ally +an=nuls +an=odes +anoma=lously +anselm +ant +an=tag=o=nizes +an=te=date +an=tholo=gies +an=thro=po=mor=phi=cally +an=tic=i=pates +an=ti=dotes +an=ti=mony +an=ti=quate +an=ti=semitism +an=ti=thet=i=cal +an=to=nio +anx=ious +any=way +ap=a=thy +apha=sia +api=ary +apoc=ryphal +apol=o=gist +apos=tolic +ap=pall +ap=par=ently +ap=pear +ap=pease=ment +ap=pended +ap=per=tains +ap=plauds +ap=pli=ca=ble +ap=plier +ap=pointer +ap=por=tion=ing +ap=prais=ers +ap=pre=ci=a=tion +ap=pre=hen=sively +ap=proach +ap=pro=pri=ate +ap=proval +ap=prox=i=mated +april +aptly +aquifer +ara=bi=ans +aramco +ar=bi=trat=ing +ar=cades +ar=chaism +arche=ol=o=gist +archimedes +ar=chi=tec=tures +arc=ing +ar=dently +are=quipa +ar=gos +ar=gu=ment +arid=ity +aris=to=crat +arith=me=tize +arm +arm=chairs +arm=ing +armpits +arousal +ar=rack +ar=range=ment +ar=rears +ar=rhe=nius +ar=ro=gate +ar=royo +ar=te=rial +arthri=tis +ar=tic=u=lately +ar=ti=fact +ar=tillerist +arts +as=cen=dant +as=cent +as=cot +ashamedly +ash=tray +asi=at=ics +ask=ing +as=per=sions +as=pi=ra=tion +ass +as=sas=si=nated +as=say +as=sem=blies +as=serter +as=sess +as=siduity +as=sign=ing +as=sist +as=so=ciate +as=so=ci=a=tor +as=suaged +as=sure +as=syr=i=an=ize +as=ter=oid +as=ton=ish=ingly +astride +as=tro=nom=i=cally +asym=met=ric +asyn=chronously +athe=ism +ath=letes +at=las +at=om=iza=tion +atone=ment +at=ro=phies +at=tach=ing +at=tain +at=tempt +at=ten=dants +at=ten=tion=al=ity +at=ten=u=a=tor +at=tired +at=tracted +at=tributable +at=tune +auburn +au=di=bly +au=diome=ter +au=di=tions +auger +au=gust +au=ral +aus=cul=tated +aus=terely +aus=tri=an=ize +au=then=ti=ca=tor +au=thor=i=ties +au=thors +au=to=cor=re=late +au=todecre=ments +au=toin=dex +au=toma=ton +au=topi=lot +au=tum=nal +availer +avari=cious +av=enues +avers +avian +avionic +avoid=able +avow +awak=ened +awards +aw=ful=ness +awry +ax=i=o=log=i=cal +ax=ioms +ayes +azure +ba=belizes +baby=ing +bac=chus +back=bend +back=fill +back=o=rder +backscat=ters +back=stitch +back=tracks +back=yard +bad=ger +baf=fle +bag=gage +bagro=dia +bailiff +baits +bakes +bal=ancers +bald=win +balka=niza=tion +balks +baller +bal=loon +ballplayer +bal=sam +bam=boo +ban=dage +band=pass +bane +ban=gui +bank +bankrupts +bans +bap=tism +bap=tized +bar=barism +bar=bells +bards +barest +barhop +barks +barn=hard +barom=e=ters +barr +bar=ren +bar=ron +barter +basalt +base=less +bash +ba=sics +bas=ket=ball +bassinets +batavia +bather +bath=tub +bat=ted +bat=ting +bat=tle=ments +baude=laire +bawl=ing +bay=o=net +be +beaded +beaker +bean=bag +bearded +beast +be=at=i=fi=ca=tion +beau +beau=ti=fied +beavers +becker +be=com=ingly +bed=der +bed=post +bed=spread +beecham +beefy +beethoven +be=fell +be=foul +be=fud=dles +beg=gary +be=got=ten +be=guil=ing +be=hav=ior=ism +be=hold +be=ing +be=lay +bel=fry +be=liev=able +be=lit=tles +belles +bel=liger=ents +bells +be=long +belt=ing +be=moans +bend=able +bene=dic=tions +ben=e=fi=ciary +ben=gal +bent +be=queath +be=rat=ing +beres=ford +berib=boned +berlin=ers +bernar=dine +bernoulli +bertie +be=sets +be=smirched +be=spoke +best=ing +bet +be=trayed +bette +be=tween +be=wail +be=wil=der=ment +bianco +bibles +bi=car=bon=ate +bi=con=vex +bid=der +bi=en=nial +big=ger +bi=har=monic +bi=l=abial +bilk +bil=let +billings +bimet=allism +bind +bing=ham=ton +bio=chem=istry +bi=o=log=i=cally +biopsy +bipeds +bird=baths +birm=ing=hamize +births +bi=sec=tors +bisques +bites +bit=terly +bi=valves +blab=ber=mouths +black=burn +black=foots +black=mailed +blacks +blaine +blamers +bland +blan=keters +blares +blas=phe=mous=ness +blatz +bleach=ers +bleat=ing +blem=ishes +bless=ings +blind=fold +blinked +bliss=fully +bliz=zard +bloch +block=ers +blond +blood=i=est +bloom +blos=soms +blow=fish +blud=geons +blueprint +bluish +blunted +blur=ring +blush=ing +boarded +boaster +boathouse +boatswain +bobb=sey +bo=den=heim +body=build=ing +bog=art +bo=gus +boil=ers +bold=face +bol=she=vist +bolton +bom=bas=tic +bo=nan=zas +bonds=man +bon=ham +bon=tempo +book=cases +book=keep=ers +book=store +booms +booster +boo=tle +boot=strap=ping +bor=den +bo=re=alis +born +bor=row=ers +bosses +botanist +bother +bot=tler +bo=tulism +bounce +bounden +bou=quet +bou=tique +bowd=ler=iz=ing +bowl +bow=string +box=ing +boyfriend +braced +brad=bury +brag=ger +braille +brain=storm +brakes +branch=ings +bran=dish=ing +brashly +braun +brav=ing +bray=ing +brazil +bread +bread=win=ners +break=fast +break=through +breast=works +breath=lessly +breed=ing +bren=nan +brevet +brew=ery +bribers +brick=lay=ers +bridge +bridge=work +briefed +brig +brighten +brighton +brim=ming +brings +bris=tle +britisher +broaches +broad=casts +broadly +broglie +bro=ken=ness +bronchial +brooch +brook=field +broth +brow=beat +brow=n=ian +bruce +brunette +brush=ing +bru=tal=ized +bryce +buch=wald +buck=ler +bucky +bud=dies +bud=geters +buff +buf=fet=ings +bug=ger +bugs +built +bulging +bull=doze +bull=frog +bul=ly=ing +bum=bling +bump=tious +bun=dle +bun=gler +bunkhouse +bunted +buoys +bu=reau=cracy +burgher +bur=glarproof=ing +burke +burn +burn=ings +burnt=ness +bur=row=ing +bursty +busch +bush=whacked +busi=nesslike +bus=tards +butchered +but=ter=cup +but=ter=nut +but=ton=holes +bu=tyrate +buz=zard +bye +by=pass=ing +by=stander +byzan=tinizes +cab=i=net +cache +cac=tus +cae=sarize +cager +ca=jole +calais +cal=cu=late +cal=cu=lus +cal=gary +cal=ico +callaghan +cal=loused +calm=ingly +cal=tech +ca=lypso +camem=bert +camino +cam=paign=ing +camps +cana=di=an=ize +can=celed +can=di=dacy +can=dler +ca=nine +can=nery +can=non +canon=i=cal +canopy +canto +can=vassed +ca=pa=ble +ca=pac=i=tors +capita +cap=i=tal=iz=ers +cap=ping +cap=stone +cap=ti=vates +cap=turer +car=a=vans +car=bon=dale +car=boniz=ing +card=board +car=di=ol=ogy +care=fully +ca=ress=ing +car=i=ca=ture +carls=bad +car=na=tion +car=o=line +car=pen=ters +car=riages +car=ruthers +carter +car=ton +carve +cas=cades +cashed +cas=ings +cas=sette +castes +casts +catalina +cat=a=pult +catches +cat=e=go=rizes +cathe=dral +catholi=cisms +cat=tle +caul=drons +causer +cau=tioner +cav=a=lier=ness +cav=ernous +caw=ing +ce=cil +celanese +celer=ity +cel=list +celti=cizes +cen=sor=ing +cen=taur +cen=time=ter +cen=tral=ized +cen=troid +cere=bral +cer=tain=ties +cer=ti=fies +cezanne +chaf=fey +chair=ing +chal=ices +chal=leng=ing +cham=paign +chan=cel=lor +change=abil=ity +chan=neled +chanter +chapel +chap=ter +char=ac=ter=ize +charge=able +char=i=ta=ble +char=lotte +chars +chart=ings +chas=ing +chas=tis=ers +chat=tel +chauf=feured +cheaply +check=book +check=out +cheek=bone +cheer=i=ness +cheeses +chemise +cher=ishes +cheryl +chests +cheyennes +chi=canos +chides +child=hood +chill +chime +chi=nas +chin=ning +chi=ro=prac=tor +chit +chloro=plasts +choir +choose +chop=ping +chore=o=graph +chou +chris=ten=son +chris=tian=iz=ing +christoph +chron=i=cle +chronol=ogy +chuck=les +church=go=ing +churn +ci=cero=ni=an=ize +cin=derella +ci=pher=texts +cir=cuitously +cir=cu=lat=ing +cir=cum=nav=i=gates +cir=cum=stanced +cir=cuses +cities +civet +civ=i=lized +claimed +clam=bers +clams +clap=board +clar=i=fi=ca=tions +clash +classes +clas=si=fiers +clat=tered +claus=tro=pho=bia +cleaned +cleansed +clearer +cleaved +clemente +clerked +cliches +cliffs +climb +clincher +clink +clip=pers +cloaks +clock=ings +clogs +close=ness +clos=ing +cloth=ing +cloud=ing +clowns +clucks +clumsy +clutch=ing +coaches +coali=tion +coastal +coat=ing +coax=ing +cob=web +cock=pit +co=coon +codes +cod=i=fies +co=ef=fi=cient +co=ex=ist +cof=fer +cog=i=tated +cogs +co=her=ing +coils +co=in=cid=ing +colder +col=icky +col=lab=o=ra=tor +col=lared +col=lect=ing +col=leges +collins +colom=bia +colonies +colons +col=or=less +colum=nize +com=bated +com=bi=na=tor +comb=ings +comedic +cometary +com=fort=ing +comma +com=mand=ment +com=mem=o=ra=tive +com=mended +com=ment=ing +com=mis=sion=ers +com=mit=teemen +com=mon=al=i=ties +com=mon=wealth +com=mu=ni=cated +com=mu=nists +com=mut=ing +com=pactors +com=pa=ra=bly +com=par=i=son +com=pas=sion +com=pelling +com=pen=satory +com=pe=ti=tions +com=pil=ers +com=plaint +com=pleted +com=plex=i=ties +com=pli=ca=tions +com=pli=ment=ing +com=pos=edly +com=post +com=pre=hen=si=bil=ity +com=pres=sion +com=pro=mis=ers +com=pul=sory +com=puted +com=radely +con=cate=na=tion +con=cede +con=ceived +con=cen=tra=tors +con=cep=tu=al=ized +con=certed +con=cise=ness +con=coct +con=cretes +con=cur=ring +con=demns +con=di=tional +con=doned +con=duc=tion +con=fec=tionery +con=ferred +con=fes=sions +con=fi=den=tial +con=fig=ure +con=fin=ing +con=fis=cates +con=fo=cal +con=found=ing +con=fu=cian +con=fu=sion +congo +con=gre=gat=ing +con=gress=women +con=joined +con=junc=ture +con=nected +con=nec=tor +con=nors +con=quered +con=rail +con=se=crate +con=sent=ing +con=ser=va=tion +con=served +con=sid=ered +con=sis=tent +con=sol=ers +con=so=nants +con=spir=a=tor +con=stant +con=stituent +con=sti=tu=tions +con=structed +con=structs +con=sul=tant +con=sumed +con=sump=tions +con=tain +con=tam=i=nated +con=tem=pla=tive +con=tender +con=tent=ment +con=text +con=ti=nents +con=tin=u=a=tions +con=tor=tions +con=tract=ing +con=tra=dict=ing +con=trap=tions +con=tribute +con=trite +con=trol=la=bil=ity +con=tro=versy +con=vened +con=ven=tion=ally +con=ver=santly +con=ver=sion +con=vex +con=vict +con=vinces +con=voys +cooked +cool=ers +coon +co=op=er=a=tions +co=or=di=nates +copeland +cop=ings +co=pro=ces=sor +co=quette +cords +corinthian +corks +cor=nered +corns +coro=nary +cor=po=rately +cor=rect +cor=rect=ness +cor=re=spond +cor=ri=dors +cor=rob=o=ra=tive +cor=rup=tion +cor=val=lis +cos=mopoli=tan +costs +cots +cotyle=don +coughs +coun=cil=woman +coun=selors +coun=ter=act=ing +coun=ter=feited +coun=ter=part +coun=ter=sunk +coun=try=wide +cou=plings +courser +cour=te=sies +court=rooms +covenant +cov=er=let +cov=etous=ness +cower +cowl +coypu +crack=ers +cra=dles +craftsper=son +cramps +crank +cranny +crater +craw=ford +craze +creaked +creams +cre=ation +cre=dence +cred=i=tor +creeks +cre=mates +cres=cents +cretin +cricket +crim=i=nal +crip=ple +criss=cross +crit=i=cizes +croaks +cro=cus +crop=per +crosser +crosstalk +crowd +crown=ing +cru=ci=fix=ion +cruel +cruis=ing +crum=pled +cru=sade +crushes +crux +cryp=tic +crys=tal=lize +cubans +cu=cum=bers +cuf=flink +cul=mi=nate +cul=tivable +cul=tural +cum=mings +cup=board +curb +cur=ing +curlers +cur=rent +cur=ry=ing +cur=sory +curtly +curv=ing +custer +cus=tomiz=able +cut +cuts +cyanamid +cycli=cally +cygnus +cy=press +cy=to=plasm +dab=bles +dadais=tic +dahl +dairy +dali +dam=ages +damns +damsel +danc=ing +dan=gle +danize +dare +darken +darn +darted +dar=winizes +database +dates +daunted +davy +day=dreams +daz=zled +deaden +deaf +deal=ings +deanna +death +de=bater +de=bil=i=tates +debtor +debu=tante +de=cay +de=ceit +de=cel=er=ate +de=cent +de=cid=abil=ity +dec=i=mate +de=ci=sion +decks +de=clarer +de=clin=ers +de=cod=ings +de=com=po=si=tion +dec=o=ra=tive +de=creases +decre=ments +ded=i=cated +deduct +deed=ing +deep +deere +de=feats +de=fen=dant +de=fen=es=trat=ing +de=fer=ments +de=fi=cien=cies +de=fine +def=i=ni=tions +de=for=ma=tion +defy +degra=da=tion +de=ify +de=jected +de=lay=ing +deleter +de=lib=er=ated +del=i=ca=cies +de=light=ful +de=lim=it=ing +deliri=ous +de=liv=er=ies +del=phic +del=uged +de=mand +deme=ter +de=mod=u=late +demons +demon=stra=tor +de=mul=ti=plex +denebola +den=i=grates +de=nom=i=na=tors +de=not=ing +dens=est +den=tists +deny=ing +de=par=ture +de=pen=dent +de=pleted +de=ploy +de=pose +de=pos=i=tors +de=pre=ci=ated +de=pri=va=tions +de=queued +dereg=u=late +de=rive +de=scend +de=scents +de=scrip=tively +de=sert=ers +de=serv=ings +des=ig=na=tor +de=sire +des=o=late +despatched +de=spite +desta=bi=lize +de=stroyed +de=struc=tive=ness +de=tacher +de=tained +de=tec=tive +de=te=ri=o=rated +de=ter=mi=na=tion +de=ter=min=is=tic +de=trac=tor +dev=as=tate +de=vel=op=ment +de=vi=a=tion +de=vised +de=vot=edly +de=vours +dexedrine +di=ag=nose +di=ag=o=nals +dial +di=a=logue +di=a=mond +di=ar=rhea +dick=in=son +dic=ta=to=rial +did=dle +dies +di=et=rich +dif=fer=en=tials +dif=fer=ers +dif=fusely +di=gest +dig=gings +dig=its +di=gress=ing +di=lap=i=date +dili=gence +di=lu=tion +di=men=sions +dimmed +dine +din=ing +dio=genes +diph=thong +dip=per +di=rec=tion +di=rec=torate +dirt +dis=able +dis=af=fec=tion +dis=al=low=ing +dis=ap=pear=ances +dis=ap=prove +dis=as=sem=bles +dis=bands +dis=card=ing +dis=cerns +dis=ci=plines +dis=clo=sure +dis=con=nects +dis=cord +dis=cour=ag=ing +dis=cov=ery +dis=cre=tion +dis=cuss +dis=ease +dis=fig=ure +dis=grun=tle +dis=gust=ingly +dis=hon=estly +dish=wa=ter +dis=joint +disk +dis=lo=cates +dis=may=ing +dis=mis=sers +dis=obe=di=ent +dis=own +dis=patched +dis=pen=sary +dis=persed +dis=plac=ing +dis=pleas=ing +dis=po=si=tion +dis=puter +dis=qui=et=ing +dis=rup=tion +dis=sem=ble +dis=senter +dis=sim=i=lar=i=ties +dis=so=ci=at=ing +distaff +dis=tastes +dis=till=ing +dis=tin=guish +dis=torts +dis=tresses +dis=tribu=tiv=ity +dis=turbed +ditty +di=ver=gence +di=ver=si=fies +di=vert=ing +div=i=dend +di=vin=ing +di=vi=sors +dix=ieland +dober=man +doc=toral +doc=u=men=taries +do=dec=a=he=dra +doe +dog=house +dolan +dol=lies +domenico +domi=cile +dom=i=neer=ing +don=ahue +don=key +doo=ley +door=man +dop=ers +doric +dort=mund +doted +dou=ble=header +doubt +doubts +doves +downey +down=load=ing +down=stairs +doyle +dra=co=nian +drafty +dra=gooned +dram +drape +draughts +draw=ings +dreaded +dream=ers +dregs +dress=ing +dries +driller +drip +drive=way +droop +drop=pers +droves +drudgery +drum=mers +drunkly +du=al=ity +dubuque +ducts +dug +dull=ness +dumbly +dun=bar +dun=geons +du=pli=ca=ble +dupont +du=ra=tion +dur=ward +duster +dutch=man +dwarfed +dwelt +dye=ing +dy=namism +dysen=tery +ear +ear=marked +earnest=ness +earth +earth=quakes +eases +east=erner +easy +eaves +eben +echoed +ecol=ogy +econ=o=mize +ecuador +ed=enizes +edict +edi=tion +ed=mon=ton +ed=u=cat=ing +ed=wards +ef=fect=ing +ef=fi=cacy +ef=fort=less=ness +eggshell +egyp=tian=ize +eigen=state +eighthes +eis=ner +eject=ing +elab=o=rately +elapses +el=derly +elec=tions +elec=tri=cally +elec=tro=cute +elec=troen=cephalog=ra=phy +elec=tron=ics +el=e=men=tal +el=e=va=tion +elicited +elim=i=nat=ing +elite +ella +el=lip=soids +elmhurst +else +elu=ci=da=tion +ely +eman=ci=pate +em=bar=rass +em=beds +em=bod=ied +em=brac=ing +emer=ald +emer=i=tus +emil +emits +emo=tion=ally +em=pha=siz=ing +em=ploy=able +em=po=rium +emp=tily +em=u=la=tor +en=acted +en=camp=ing +en=chanter +en=cir=cled +en=coder +en=counter +en=cour=ag=ingly +en=cum=bered +en=dan=gers +en=demic +en=dorse +en=dows +en=dur=ingly +en=fee=ble +en=fran=chise +en=gels +en=gines +en=glish=men +en=gulf +en=join +en=joys +en=light=ened +en=livens +enor=mity +en=quirer +en=riches +en=sem=bles +en=snar=ing +en=sures +en=ter=prise +en=ter=tain=ment +en=ticed +en=ti=tle +en=treat +en=trepreneurs +enu=mer=ated +en=veloped +en=v=i=ron +en=vi=sioned +ephemeral +epi=cur=izes +epis=co=palian +epi=taphs +epochs +equal=ize +equates +equi=li=brate +equips +equiv=o=cally +erased +ere +ergo +er=lang +erode +er=ra=tum +errs +es=ca=lates +es=capes +es=corts +es=pe=cially +es=quires +es=sen=tially +es=tates +es=ti=mated +eter=nal +eth=er=nets +etruria +eu=le=rian +eura=sia +eu=ro=peanized +evade +eval=u=a=tive +evap=o=ra=tion +even=hand=ed=ness +events +ev=er=glades +ev=ery=thing +ev=i=dences +evinces +evolve +ex=ac=er=bated +ex=ac=tions +ex=ag=ger=a=tions +ex=am=ined +ex=as=per=ates +ex=ceeded +ex=cel=lently +ex=cep=tions +ex=change=able +ex=ci=sion +ex=cit=ingly +ex=clam=a=tory +ex=clu=sive=ness +ex=cret=ing +ex=cused +ex=e=cu=tional +ex=em=pli=fied +ex=empts +ex=er=tion +ex=haust=edly +ex=hi=bi=tions +ex=ile +ex=is=ten=tial=ist +ex=or=bi=tant +ex=panders +ex=pect +ex=pects +ex=pe=di=tious +ex=pen=di=ture +ex=pe=ri=enc=ing +ex=per=i=ments +ex=pires +ex=pla=na=tions +ex=ploit +ex=plo=rations +ex=plo=sive +ex=po=nen=ti=at=ing +ex=ports +ex=po=sure +ex=press=ibil=ity +ex=pul=sion +ex=tem=po=ra=ne=ous +ex=ten=sive +ex=ter=mi=nate +ex=tin=guished +ex=tract +ex=tra=ne=ous +ex=trap=o=la=tion +ex=tremely +ex=ult +eye=glasses +eye=sight +fa=bles +fa=cade +facile +fac=sim=ile +fac=to=ries +fac=ulty +fa=gin +fail=soft +faint=ness +fair=ing +faith=ful +fakes +fal=la=cious +fal=mouth +fal=si=fy=ing +fa=mil=iar +fam=i=lies +fa=nati=cism +fanci=ness +fan=ning +farad +farewells +farm=ers +far=rell +fas=ci=na=tion +fasted +fas=tid=i=ous +fate +fath=omed +fat=ten +faulkner +fauna +fa=vor=ing +fayette +fear=lessly +feat +feath=er=weight +fed +fee=ble=ness +feeds +feet +fe=line +fel=low=ships +fem=i=nism +fenc=ing +fer=men=ta=tion +fe=ro=ciously +fer=tile +fer=vent +fes=tiv=ity +fet=tered +fever=ish +fiat +fi=brously +fid=dled +fief +fiendish +fif=teenth +fight=ing +fiji +files +filled +film=ing +filthy +fi=nals +finder +fines +fin=ger=print +fin=ishes +finnish +fire=boat +fire=men +fire=wall +firm=ing +fis=cally +fishes +fis=sured +fitly +fitz=patrick +fix=a=tion +fix=ture +flagged +flak +flamer +flank=ing +flash +flask +flat=tered +flaunt=ing +flaw=lessly +fledglings +fleetly +flem=ish=ing +flew +flick=ing +flinches +flirt +floated +flood +floors +flo=ren=tine +floss=ing +flour=ished +flow=er=i=ness +fluc=tu=ate +fluffier +flu=o=resce +flut=ing +fly=ing +fo=cal +foes +fogy +fold=ers +folksy +fol=som +font +fooled +foot=ball +foot=ing +for=age +forbes +forcer +fore=arms +fore=fa=thers +for=eign +fore=see=able +fore=stalls +for=ever +forge +for=get=table +for=giv=ing +for=lornly +for=mal=ized +for=ma=tively +formi=cas +for=mu=lated +for=saken +forth=with +for=tiori +for=tu=itously +for=warder +fought +foun=da=tion +founds +four=some +foxes +frag=ile +fra=grantly +fram=ing +fran=cie +fran=coise +frank=ing +fraser +fray +freckle +fred=erico +free=ing +frees +freez=ing +frenchizes +fre=quented +fresh=ened +fresh=ness +freudi=an=ism +fric=tion +friendlier +friezes +fright=ful +frisia +frivolity +from +fronts +froth=ing +frue=hauf +fruits +fuch=sia +fu=jitsu +full +fum=bling +func=tion=ally +fun=da=men=tally +fun=gal +fun=nier +fur=long +fur=ni=ture +fur=ther=more +fuses +fu=tur=is=tic +gabled +gad=getry +gag=ing +gaines +galac=tic +galaxy +gal=lantly +gal=lon +gall=stone +gam=bled +games +gang=plank +gaped +garbed +gard=ner +gar=landed +gar=risoned +gaseous +gaspee +gas=tric +gath=ered +gauche +gaunt +gawky +gaze +gear=ing +gelatin +gemma +gen=er=al=ity +gen=er=als +generic +ge=netic +genre +gen=tler +geodesic +ge=o=log=i=cal +geo=phys=i=cal +geral=dine +ger=mane +ger=mi=nates +gestapo +get=ting +ghosted +gibral=tar +gig +gig=gle +gilds +gilt +ging=hams +gipsy +girl=ish +giver +glad=dest +glance +glar=ing +glazed +gleaner +glenda +glim=mer +glints +gloat +glo=ria +glo=ry=ing +glove +glow=ing +glynn +gnu +goats +gob=lins +god=mother +goethe +gold=enly +gold=s=tine +gon=dola +goode +goodyear +goren +gor=ton +got +goth=i=ciz=ing +goug=ing +gov=ern=ment +grab +grace=fully +gra=da=tions +grad=ual +graft +grained +grams +grand=fa=ther +grandpa +grant +gran=u=lates +graph=i=cal +gras=pable +grassi=est +grat=i=fi=ca=tion +gra=tu=itously +graves +grayed +grease +gre=cian=ize +greeks +green=feld +greens +greeter +grenades +greyest +grievances +grif=fith +grimes +grinds +gripped +gritty +gro=cers +grooved +gross=est +gro=ton +group +grov=els +growl=ing +grubs +grum=bling +guano +guard=edly +gu=ber=na=to=rial +guest +guide=line +guiltier +guises +gul=lah +gum=ming +gun=ner +gur=gle +gustafson +guts +guyer +gym=nas=tics +haas +ha=bit=ual +hacks +hag +hail +hairier +hale +hall=mark +halpern +halve +ham=burg=ers +ham=mer=ing +hamp=shire +hand=books +hand=i=cap +hand=ker=chiefs +hand=shake +handy +hang=man +han=nah +hansel +hap=lessly +hap=pily +harbinger +harder +hard=ships +harken +harm=ful=ness +har=mo=niously +har=ness=ing +har=ri=man +harry +har=vardize +har=veys +has=sle +hat +hate=fully +hat=tie +hauler +hausa +havoc +hawthorne +hay=wood +head +head=lands +head=room +heals +healy +hear=ings +heartily +heater +heaved +heav=i=ness +he=brides +hedge=hog +heeds +hegelian=izes +heights +heiresses +he=li=copter +hel=l=enized +hel=met +help=fully +hem +hemp +hen=drick +hen=ri=etta +her=alds +herder +here=ford +here=un=der +her=mit +hero=ically +her=ring +hert=zog +hes=pe=rus +het=eroge=nous +heuser +hexagon +hi=ber=nate +hid=den +hi=er=ar=chic +high=field +high=nesses +hikes +hill=crest +hilt +hin=dered +hin=dus=tan +hint=ing +hired +his +his=tograms +hitch +hither +hit=ting +hoarse=ness +hobby +hoe +hoists +holds +hol=landaise +hol=low=ness +holo=caust +homage +home=o=mor=phism +home=spun +hom=ing +ho=mo=sex=ual +hon=esty +hon=ey=moon=ing +hon=o=raries +hood=lum +hooker +hoosier=ize +hooves +hope=less=ness +ho=race +horn +hor=ri=ble +hor=rors +horse=shoer +hos=pi=tal=ize +hostesses +hotly +hound=ing +house=flies +house=top +hover +howled +hu=bert +huey +hugo +hu=man=i=ties +hum=bling +hu=mid=i=fiers +hu=mil=i=a=tion +hu=mor=ers +humpty +hung +hun=gry +hunt=ley +hurl=ing +hur=ry=ing +hus=bands +husks +hutchins +hyde +hy=giene +hy=phen=ate +hy=pothe=ses +hys=ter=i=cal +ib=sen +ici=cle +icosa=he=dron +ide=al=ize +iden=ti=cal +iden=tify +id=iosyn=crasy +idles +ig=nite +ig=nores +il=le=gal=ity +il=log=i=cal +il=lu=sions +il=lus=tra=tive +im=a=gen +imag=ine +im=brium +im=ma=te=rial +im=mensely +im=mi=grat=ing +im=mov=abil=ity +im=pacted +im=pale +im=pa=tiently +im=pedes +im=pen=e=tra=ble +im=per=fectly +im=per=ma=nent +im=per=son=ations +im=pinges +im=ple=mentable +im=pli=cants +im=plied +im=por=tant +im=poses +im=po=tence +im=prac=ti=cally +im=press=ible +im=press=ment +im=pris=on=ments +im=prove=ment +im=pro=vis=ers +im=pul=sion +in=ac=ces=si=ble +in=ad=e=quate +inane +in=audi=ble +inca +in=cas +in=ces=santly +in=ci=den=tally +in=cit=ing +in=closes +in=clu=sive=ness +in=com=pa=ra=ble +in=com=pletely +in=con=gruity +in=con=sis=tent +in=con=ve=nient +in=cor=rect=ness +in=cred=u=lous +in=cu=bate +in=cur=able +in=de=ci=sive +in=dent +in=de=scrib=able +in=dex=ing +in=di=ca=tion +in=dif=fer=ence +in=dig=na=tion +in=di=rectly +in=dis=tinct +in=di=vid=u=ally +in=doc=tri=nat=ing +in=du=bitable +in=duc=tances +in=ducts +in=dus=tri=al=ist +in=dus=try +in=el=e=gant +inertly +in=ex=act +in=ex=pli=ca=ble +in=fantry +in=fec=tion +in=fe=rior +in=fer=tile +in=fi=nite +in=fir=mary +in=flated +in=flict=ing +in=form +in=for=ma=tively +in=fre=quently +in=fu=ri=at=ing +in=ge=nious=ness +in=gra=ti=ate +in=hab=ited +in=her=ently +in=her=itress +in=hibitor +in=im=i=cal +ini=tial=ized +ini=ti=at=ing +in=jec=tion +in=jured +inker +in=let +in=ner +in=nocu=ous=ness +in=oc=u=late +in=quire +in=quis=i=tive +in=scribed +in=se=curely +in=ser=tion +in=sid=i=ous=ness +in=sin=u=ated +in=sis=tently +in=som=nia +in=spi=ra=tion +in=stal=la=tion +in=stances +in=stan=ti=a=tions +in=still +in=sti=tutes +in=struct +in=structs +in=stru=ments +in=su=la=tion +in=sur=ance +in=sur=rec=tion +in=te=grand +in=tel=lect +in=tel=li=gi=ble +in=ten=si=fi=ca=tion +in=ten=sively +in=ter +in=ter=cept +in=ter=changed +in=ter=com=mu=ni=cates +in=ter=course +in=ter=ested +in=ter=fered +in=ter=group +in=ter=leaved +in=ter=minable +in=ter=mod=ule +in=ter=na=tion=al=ity +in=ter=per=sonal +in=ter=posed +in=ter=pret=ing +in=ter=re=la=tions +in=ter=rupt +in=ter=sect=ing +in=ter=state +in=ter=ven=ing +in=ter=wo=ven +in=ti=ma=tion +in=tol=er=ance +in=tractabil=ity +in=traof=fice +in=trigued +in=tro=duc=tions +in=truder +in=tu=ba=tion +in=vaders +in=va=lidi=ties +in=vari=ants +in=ven=tively +in=verses +in=vert=ing +in=ves=tiga=tive +in=vet=er=ate +in=vites +in=voked +in=volves +io=ni=ans +ira +irately +irish=man +ironic +ir=ra=tional +ir=reg=u=lar +ir=re=press=ible +ir=re=versibil=ity +ir=ri=ta=ble +irv=ing +is=fa=han +is=land +iso=lated +iso=mor=phisms +is=suance +it +ital=i=cize +item=iza=tions +it=er=a=tion +ito +izves=tia +jack=ets +jacky +ja=cobus +jailer +ja=maican +janet +janus +jar=gon +jaun=ti=ness +jay +jeanne +jef=fer=so=nian +jen=nifer +jeremy +jer=oboam +jest +je=suit=iz=ing +jew=eled +jews +jin=gled +joaquin +joes +john +joiner +jok=ers +jolts +jor=dan +jose=phus +jot=ting +jour=nals +joust=ing +joy=ous +ju=daica +judge +ju=dith +ju=goslavia +julie +jump +junc=tures +ju=niper +juras +jury +jus=ti=fiers +jut=land +kad=dish +kamikazes +kant +karp +ka=tow=ice +keel=ing +keep=ers +kemp +ken=ney +ke=pler +ker=ouac +key +key=pad +khrushchevs +kidde +kid=ney +kil=i=man=jaro +kills +kilo=joule +ki=mono +kin=dling +king=pin +kin=nick=in=nic +kir=choff +kisses +kit=ing +klein +knap=sacks +kneel +knicker=bock=ers +knights +knocked +knots +knowl=edge +knuck=les +ko=dachrome +ko=rea +kraka=toa +kro=necker +kurd +la=bel=ing +la=borer +labyrinths +lac=erta +lacks +ladies +la=goon +laid=law +lamarck +lament +lamp +lanced +land=ings +lands +lange +lan=guish +laos +lapse +largely +lar=son +lash=ing +las=zlo +later +la=tin=ity +lat=i=tudes +laud=able +laugh=lin +laun=dered +lau=rels +laven=der +law=fully +law=suit +lay=ers +lazarus +leaded +leafed +lea=guers +le=an=der +leap=ing +leary +leath=ern +leav=ing +lec=tures +leeds +left=ists +le=gal=iza=tion +leger +leg=is=lated +le=git=i=mate +leila +lemon +lends +le=niency +lens +leonardo +les=bian +les=son +let=ter +levee +lev=elly +levin +lewdly +lex=ing=ton +li=belous +lib=er=ated +li=bido +li=cense +lick +lied +lifeboat +life=time +ligget +light=hearted +like +like=ness +lil=ian +li=man +limit +lim=its +lind +lindy +lin=early +lin=gerie +lin=ing +lin=naeus +li=oness +liq=uid +lise +lis=tened +list=ings +lit=er=al=ness +lithua=nia +lit=ter=ing +live +liv=ers +lizzie +loaf +loathing +lob=ster +lo=cally +lo=ca=tor +lock=ian +lock=wood +lodges +log=a=rithm +log=i=cally +logs +loi=ters +lon=doniza=tion +lon=ers +long=ings +look=ers +looms +loose=leaf +loos=ing +lords +lorry +lossi=est +lo=tus +louisa +lour=des +lovelace +loves +low=est +loy=ally +lucerne +luck=ier +lu=di=crous +luke +lu=mi=nously +lunch +lunged +lur=ing +lust +luther +lux=u=ri=antly +lyle +lynx +mac +mac=don=ald +maces +ma=chin=ery +mackey +macro=molecule +mad=den +mad=hya +mad=sen +mag=el=lanic +mag=ill +mag=ne=ti=z=able +mag=nify +maguire +maids +mail=man +main=frames +main=tained +majesty +maker +mal=ady +mal=colm +mal=formed +ma=li=cious=ness +mal=one +mal=ton +man=age +man=ag=ing +manda=tory +manger +man=hole +man=i=cur=ing +manila +ma=nip=u=la=tive +mann +manors +man=tissa +man=u=fac=tured +mao +maps +marched +mardis +margo +mari=nade +mar=itime +mar=ketabil=ity +mark=ings +mar=malade +mar=riott +mar=shal=ing +mar=tial +mar=tyr +mar=vels +mas=cara +mask=able +ma=sonite +mas=sa=cred +mast +mas=ter=piece +mas=tur=ba=tion +match=less +ma=te=ri=al=iz=ing +math=e=mat=i=cally +mat=ings +ma=trix +mat=tered +ma=tured +mauri=cio +max=ima +max=ims +maybe +may=oral +mc=cabe +mc=cluskey +mc=don=nell +mc=gov=ern +mc=kee +mclean +mcpher=son +meal=time +mean=ing=ful +meant +mea=sure=ments +me=chan=i=cally +medal +med=field +me=di=a=tions +medicine +med=i=tat=ing +medi=ums +meet=ing +mega=hertz +meis=ter +melcher +melodies +melpomene +mem=ber=ship +mem=o=randa +mem=o=rizes +menagerie +mendelizes +men=non=ite +men=tal=i=ties +men=tor +mer=ce=nar=i=ness +mer=ci=lessly +merged +mer=i=to=ri=ous +mer=rill +mesh +mes=sen=ger +messy +met=al=liza=tion +meta=phys=i=cal +me=te=oritic +me=thod=i=cally +meth=ods +metro +mews +mica +mick +mi=cro=bi=cide +mi=croe=co=nomics +mi=cron +mi=cro=pro=cess=ing +mi=cro=scope +mi=crovaxes +mid=dle=man +mid=night +mid=stream +mid=win=ter +mi=grate +mikoyan +mileage +milk +mill +mil=likan +mil=lionth +mill=stones +mil=tonized +minaret +mind=fully +min=eral +mini +min=ima +min=i=mizes +min=istries +mi=nor +min=strels +minute +mir=a=cle +miriam +mis=car=riage +mis=con=cep=tion +mis=er=ably +mis=giv=ings +mis=led +mis=plac=ing +miss=ing +mis=soula +mis=take +mistle=toe +mis=un=der=stand +mitch +mitres +mix=tures +moats +mocked +modally +mod=er=ated +mod=ern=izer +mod=icum +mod=i=fy=ing +mod=u=lar=iz=ing +mod=ule +moghul +moines +mol=davia +moles +mol=lusk +mo=men=tar=ily +monaco +mon=day +mon=go=lian +mon=keyed +mono=cotyle=don +mono=lithic +monos=table +mon=roe +mon=tague +mont=gomery +mon=u=ment +mooned +moor +moped +morass +more=house +morn +mor=pho=log=i=cal +morsels +mort=gage +mo=saic +mosque +mo=tels +moth=er=land +mo=tion=less=ness +mot=ley +mo=tor=ized +mound +moun=tain=ously +mourn=ers +mousy +mov=able +mov=ing +muck +mud=dled +muf=fin +mugs +mul=lah +mul=ti=com=puter +mul=ti=ple +mul=ti=pli=cand +mul=ti=plies +mul=ti=stage +mum=bles +mun=dane +mu=ni=tions +mur=der=ing +mur=murs +mus=covy +mush=roomed +mu=si=cians +muskrat +mus=sorgsky +mu=ta=bil=ity +mu=ta=tions +mu=ti=lat=ing +mut=ters +myce=naean +mys=te=ri=ous +mytholo=gies +na=gasaki +nair +naked=ness +names +nanook +nap=kin +nar=cotic +nar=row=est +nash +na=tal +na=tion=al=i=ties +na=tions +nat=u=ral=ist +naugh=ti=ness +navel +navona +ne=an=derthal +nears +neb=ula +ne=ces=si=ta=tion +neck=ties +nee=dled +needy +neg=a=tives +neg=li=gi=ble +ne=groid +neigh=bor=ing +neo=clas=sic +nero +nest=ing +nets +neu=ral +neu=tral +neva +new=bury=port +new=man +news=man +next +ni=belung +nicholls +nick=name +niel=son +night=fall +ni=hilism +nim=bler +nineties +nip=ponizes +no=bil=ity +noc=tur=nally +noel +nolan +nom=i=nee +non=con=ser=va=tive +non=de=ter=min=ism +non=govern=men=tal +non=lin=ear=ity +nonorthog=o=nal +non=seg=mented +non=ter=mi=nals +nook +nord=hoff +nor=mal=iza=tion +nor=man=iza=tions +north +north=ernly +nor=walk +nos=tradamus +no=ta=rizes +note +no=tice=able +no=ti=fies +not=ting=ham +no=vak +novices +nu=ances +nu=clide +nullary +num=ber +nu=mer=able +nu=mis=matic +nurs=ing +nu=tri=tious +nyquist +oases +obe=di=ent +ob=fus=cate +ob=jec=tively +obliged +oblit=er=at=ing +ob=scene +ob=serv=able +ob=servers +ob=so=letes +ob=struc=tion +ob=vi=ated +oc=ca=sional +oc=ci=den=tal=ize +oc=clu=sions +oc=cu=pied +oc=curs +oc=tag=o=nal +octets +oddly +odi=ous +o'dwyer +of=fended +of=fer +of=fi=cer +of=fi=ciously +oft +oil=cloth +ojibwa +old=en=burg +oleo=mar=garine +olivia +olym=pus +omi=nous=ness +om=nipresent +o'neill +on=looker +onus +opaquely +open=ings +op=er=ate +op=er=a=tor +op=pen=heimer +op=pose +op=pressed +opthalmic +op=ti=mist +op=ti=miz=ing +opts +or=anges +or=bital +or=ches=tral +or=der +or=di=nar=ily +ores +or=ga=ni=za=tion +or=gans +ori=en=tal=ized +ori=fices +orig=i=na=tion +or=leans +or=nate +orr +orville +os=cil=lates +o'shea +os=teopath +oth=ello +otto +ounces +out=burst +out=door +out=grow=ing +out=law=ing +out=live +out=per=forms +out=rages +out=stand=ing +out=vot=ing +out=wit=ting +over=board +over=crowds +over=es=ti=mates +over=hangs +over=joyed +over=load +overnighter +over=pro=duc=tion +over=run=ning +over=shad=ow=ing +over=sized +over=take +overtly +overuse +over=work=ing +owen +own=er=ship +ox=i=dized +ozzie +paci=fi=ca=tion +pack=aged +pack=ers +padding +pageant +pag=i=nat=ing +painful +paint=ing +pa=ja=mas +pale +pales=tine +pal=lia=tive +palo=mar +panacea +pan=demic +pan=els +panned +pan=the=ist +panty +pa=per=ers +par +pa=rades +paragon +par=al=lel +par=al=lels +pa=ram=e=ter=ize +para=mus +para=phrases +par=cel +par=doned +paren=the=ses +pares +parisian +park=ers +par=lay +par=ody +par=rots +par=si=fal +par=takes +par=tic=i=pant +par=tic=u=lar +par=ti=tioned +par=tridges +pas=sage=way +pas=sion +pass=port +pas=teur +pas=ture +patchy +patents +patho=gen=e=sis +pa=tients +pa=tri=cians +pa=trolling +pa=trons +pat=tern=ing +paula +paulus +pave=ment +pawn +payer +pay=offs +peace=fully +peaks +pearl +peat +pe=cu=liar +pedant +pe=di=a=tri=cian +peel=ing +peer=ing +peking +pem=broke +pence +pends +pen=e=tra=tion +penin=su=las +penn=syl=va=nia +pen=tagon +peo=pled +pep=pery +per=ceived +per=cents +per=chance +peren=ni=ally +per=fect=ness +per=forms +per=i=he=lion +pe=ri=od=i=cally +per=ish=able +perkins +per=me=at=ing +per=mit +per=ni=cious +per=pe=tra=tion +per=pet=u=a=tion +per=se=cut=ing +per=se=veres +per=sist +per=sonal +per=son=i=fied +per=spi=ra=tion +per=sua=sions +per=turb +pe=ruses +per=va=sive +pester +pe=ters +petri +pet=ting +phae=dra +phaser +phe=nomeno=log=i=cal +philco +philis=tinizes +philoso=phies +phoeni=cia +phon=ing +phos=pho=rus +pho=to=genic +pho=tos +phyla +physi=cist +pi +pick +pick=et=ing +pick=man +pi=co=joule +pic=tur=ing +pied=fort +pies +pig=gy=backed +pig=tail +pil=fer=age +pil=lar +pi=lots +pin=cush=ion +pin=ing +pin=na=cle +pin=scher +pi=o=neers +pipelin=ing +pi=rate +pis=tols +pitch=ing +pithi=ness +piti=less +pi=tu=itary +pix=els +place=ment +pla=gia=rist +plain=field +plain=tive=ness +planeload +plan=ets +planocon=cave +plant=ings +plas=tic=ity +plates +pla=toon +play=boy +play=ing +play=wrights +pleas=ant +pleat +ple=nary +pli=ant +plots +plows +plug=gable +plume +plun=dered +plung=ing +plu=to=nium +poc=a=hon=tas +pod +po=et=i=cal +poincare +pointy +poi=sons +po=laris +po=lice +pol=ish +po=liter +polka +pol=luted +poly=mer +pomera=nia +pompous=ness +ponds +pool +pop +pop=ping +pop=u=lar=ized +pop=u=lous +pores +port +por=tend=ing +por=tico +por=tray +posed +po=si=tion +posits +pos=ses=sive +pos=sums +pos=te=ri=ori +post=mas=ters +postscript +pot +po=ten=tates +po=tion +pot=tery +pounces +pourer +poverty +pow=er=ful +prac=ti=ca=ble +prac=ti=tion=ers +praise +prancer +prayer +pre=al=lo=cated +pre=car=i=ously +prece=dents +pre=ciously +pre=cip=i=ta=tion +pre=cludes +pre=con=cep=tion +pre=dat=ing +pre=de=ter=mi=na=tion +pred=i=ca=tion +pre=dic=tive +pre=dom=i=nately +pre=emp=tive +pref=ac=ing +prefers +preini=tial=izes +pre=lim=i=nary +premise +pre=oc=cu=pied +pre=pared +pre=pos=ter=ously +pre=rog=a=tives +pre=scrip=tions +pre=sen=ta=tions +pre=served +pres=i=den=tial +press=ings +pre=ston +pre=sump=tu=ous=ness +pre=tend=ing +pre=texts +pre=vail=ing +pre=vent=ing +pre=vi=ously +pricers +prides +pri=mar=ily +prim=ing +princesses +prin=ci=ples +prior +pris=on=ers +pri=va=tions +prizes +pro=bate +prob=ings +pro=ce=dure +pro=cess=ing +procla=ma=tion +pro=cre=ate +pro=curer +pro=duce +pro=duc=tive +pro=fes=sion +prof=fered +prof=itabil=ity +pro=found +pro=gram +pro=gresses +pro=hi=bi=tions +pro=jec=tions +pro=le=tariat +pro=long +promi=nent +pro=moter +promptest +pro=mul=ga=tion +pro=nounce=ment +proofs +propane +prop=erly +proph=esy +pro=por=tion=ately +pro=poser +pro=pounded +pro=rate +pros=e=cutes +prosodic +prospec=tor +prostate +pro=tect=ing +pro=tege +protes=ta=tions +pro=tons +pro=to=zoan +prouder +prove=nance +prov=i=dence +pro=vi=sion +pro=vokes +prox=i=mal +pruned +prus=sian=ize +pseu=doin=struc=tion +psy=chi=a=trist +psy=cho=log=i=cally +psy=cho=so=matic +pub +pub=licly +puck=ered +puffed +puller +pulls +pulse +pump=kin +punc=tu=ally +pun=ish=able +punt +pup=peteer +pur=chases +purges +pu=rina +pur=pler +pur=posed +purse +pur=su=ing +push=down +put=nam +puz=zle=ment +py=ongyang +pythagore=anizes +quad=ran=gle +qua=dren=nial +quag=mires +quak=er=ess +qual=i=fied +qualm +quan=ti=fiers +quan=tize +quar=reled +quar=ter=ing +quasar +qua=ver=ing +queerer +queried +ques=tion=able +ques=tions +quib=ble +quick=lime +qui=et=ing +quince +quit +quiv=ers +quon=set +quo=tient +ra=bin +rach=mani=noff +rack=e=teers +ra=di=ance +ra=di=a=tors +ra=dio=g=ra=phy +rae +rages +raider +rail=roaded +rain=bow +rains +rake +ral=ston +ram=i=fi=ca=tions +rams +rand +randy +rangy +rank=ings +ran=somer +rap +rapids +rap=tur=ous +ras=cally +rasp=ing +rat=for +ra=tion +ra=tio=nal=izes +rat=tler +rav=ager +ravens +rawl=ins +rays +reach +re=acted +re=ac=ti=va=tion +reader +read=justed +re=aligned +re=al=iz=able +realm +reaped +rear +re=ar=rest +rea=son=ings +re=as=signed +reawak=ened +re=bel=lions +re=boot=ing +re=buffed +re=but=ted +re=cal=i=brated +re=ca=pit=u=lates +re=ceded +re=ceives +re=cep=tive +re=cife +re=cip=ro=cat=ing +recita=tions +reck=oned +re=claim=ing +re=clin=ing +rec=og=nize +rec=ol=lect +rec=om=mend +re=com=piles +rec=on=cil=i=a=tion +re=con=nect +re=con=sti=tuted +recorder +re=cover +recre=at=ing +recta +re=cur +re=curs=ing +red +re=de=clared +re=de=fined +re=de=vel=op=ment +re=dis=played +red=ness +re=dress=ing +re=ducibly +reeds +re=elects +reen=force=ment +reestab=lish=ing +re=ex=am=in=ing +ref=er=ences +re=fer=ral +re=fine +re=flect=ing +re=flexes +re=for=ma=tory +re=for=mu=lated +re=frained +re=fresh=ment +refugee +re=futed +re=gally +re=gen=er=at=ing +reg=i=men=ta=tion +regis +re=gressed +re=gret=table +reg=u=larly +reg=u=la=tors +re=hears=ing +re=im=bursable +reined +rein=hold +re=in=stated +rein=tro=duces +re=it=er=a=tion +re=joiced +re=la=beled +re=lat=ing +rel=a=tives +re=laxes +rel=e=gate +re=lents +relic +re=liev=ing +re=lin=quish=ing +reloader +re=luc=tance +re=mains +reme=died +re=mind +rem=i=nis=cently +re=mod=els +re=motely +re=mov=ing +re=names +ren=dezvous +re=new=able +re=nounc=ing +rented +re=open +re=or=ga=nize +re=pair=man +re=pay=ing +re=peat=edly +re=pen=tance +rep=e=ti=tious +re=place=able +re=plays +repli=cate +re=port +repos=ing +rep=re=sentably +rep=re=sent=ing +re=prieved +re=proach +re=pro=ducibil=i=ties +re=pro=grams +re=publics +re=pulses +re=puted +re=quired +req=ui=si=tions +re=scind +re=searchers +re=sem=blances +re=sent=ment +reser=voir +res=i=dent +res=ig=na=tion +re=sis=tance +re=sis=tors +re=solver +re=sort=ing +re=spect +re=spec=tive +re=sponded +re=spon=si=ble +restarts +rest=ful +restora=tions +re=strain=ers +re=stric=tive +re=sul=tant +re=sum=ing +res=ur=rec=tors +re=tail=ing +re=tal=ia=tory +re=ten=tive=ness +retina +re=tir=ing +re=tract=ing +re=trans=mis=sion +ret=ri=bu=tion +re=triever +ret=ro=spec=tion +re=type +re=unit=ing +re=vamp=ing +rev=eler +re=vere +rever=i=fies +re=verses +re=viewer +re=viser +re=vival +re=voked +rev=o=lu=tion +re=volvers +rewind=ing +rewrit=ing +rhe=sus +rhode +rhyming +rib=bons +richard +rich=mond +rico +ride +ridiculed +ri=fle +rig=ging +right=ful=ness +rigor +rims +ring=ings +ri=or=dan +ripely +rip=pling +risk +rit=u=ally +river +rivulet +road=sters +roar=ing +rob=beries +roberta +robin=sonville +rochester +rocket +rock=well +rods +roll +ro=mance +ro=man=izes +romper +roof=ing +room=ing +root +rop=ing +rose=bush +rosetta +rot +ro=ta=tions +ro=tund +rough=ness +round=ing +roused +routes +rov=ing +row=ley +roy=alty +rub=bing +rubles +rude=ness +ruf=fian +rugged=ness +rule +ru=ma=ni=ans +rummy +run=away +runoff +rup=tur=ing +rus=sell +rus=tic +rustlers +ruth=less=ness +sab=bathize +sachs +sac=ri=fice +sacro=sanct +sad=dles +sa=fari +safes +sage=brush +said +sails +sal=able +salerno +saline +sally +salters +salu=ta=tions +sal=vages +same +sam=pling +sana=to=rium +sanc=tion=ing +sand=burg +san=dra +san=est +san=skrit +sapling +saran +sari +satchel +satires +sat=isfy +sat=ur=na=lia +saud +sav=aged +saver +sa=vored +saw=fish +sax=onize +say=ings +scala +scal=ing +scam=pers +scan=ners +scape=goat +scared +scat=ter +scenic +schantz +schelling +schemers +schmitt +scholas=tic +school=houses +schroeder +schuylkill +scis=sor +scoffs +scope +score=board +scorner +scotch=gard +scotts=dale +scouted +scram=bled +scrapes +scratch=ing +scream=ers +screen=ings +scrib=bled +scripts +scrump=tious +scuf=fle +sculp=tured +scythe +sea=gate +seam +seaquar=ium +search=light +sea=son=able +seat +se=ceded +sec=ondary +sec=re=tar=ial +se=cre=tive +sec=tions +se=cur=ings +sedi=tion +see +seedy +seem=ing +seer +seg=men=ta=tions +se=gundo +seizures +se=lect=man +self=ishly +sells +se=man=tics +semi=con=duc=tor +semiper=ma=nently +sen=ate +seneca +sense +sens=ing +sen=sual +sen=ti=men=tally +sep=a=rately +sept +se=quencers +se=quen=tially +serene +se=ri=al=iz=able +serif +serra +ser=vice +serv=ings +sets +set=tler +sev=en=teens +sev=er=ance +sev=ers +sex +sex=ual +shack=led +shadi=ness +shaf=fer +shak=ers +shale +shame=ful +shang=haied +shape=less +shard +shares +sharp=en=ing +shat=ter=ing +shawano +shear=ing +sheds +sheets +shel=ley +shelves +sheri=dan +shied +shiftier +shilling +shiner +shin=toizes +ship=per +shirk +shiver +shocker +shoe=horn +shooter +shop=pers +short=age +short=ens +shorts +shoul=dered +shoved +showed +shows +shrewd +shrilled +shrink=ing +shrugs +shuf=fled +shut=off +shut=tles +siberia +sicken +side=band +sides +sid=ings +sierra +sighed +sigma +sig=na=ture +sig=ni=fi=ca=tion +sikkim +silent +silken +sills +sil=ver=man +sim=ile +si=mon +sim=plic=i=ties +sim=plis=tic +sim=u=la=tion +sin=bad +sinews +singed +sin=glet +sin=gu=larly +sin=ner +sioux +sirens +sisy=phus +sit=tings +siva +six=ties +skate +skep=ti=cal +sketch=pad +skid=ding +skill=ful=ness +skims +skipped +skir=mishes +skulked +sky +sky=rock=ets +slacks +slang +slash +slaugh=ter +slavic +slavoni=cizes +sledge=ham=mer +sleep=less +sleighs +sliced +slide +slightly +slings +slips +slo=gans +slop=pi=ness +slot=ting +slower +slug=gish=ness +slums +smacked +small=time +smasher +smell +smiles +smith=so=nian +smoked +smol=dered +smooth=ing +smug +smythe +snap +snap=shots +snatched +sneaki=est +sneers +sniffs +snod=grass +snorkel +snow=belt +snows +snuffs +soak +soared +sobers +so=cial=ists +so=ci=o=log=i=cal +socks +so=fas +softly +so=journ +sol=dier +solenoid +solid +solids +so=los +sol=vent +somber +som=er=set +son +sonny +soothe +so=phis=ti=ca=tion +sor=did +sor=est +sor=rows +soul +sound=ness +soured +south=bound +south=land +so=vi=ets +spacer +spaded +spaniardiza=tion +spanked +spare +sparked +sparsely +spat +spawned +speak=ers +spe=cial=ists +spe=cialty +spec=i=fied +speckle +spec=ta=tors +spec=trog=ra=phy +spec=u=lates +speech=less +speeds +spellings +spent +spica +spies +spilt +spin=ner +spi=rally +spir=i=tu=als +spit=ing +spleen +splic=ing +splits +spoil=ing +sponged +spon=sor=ship +spool=ers +spores +sportswriter +spot=ter +sprague +spray=ing +sprees +springi=ness +sprint +sprouted +spurn +sput=tered +squadrons +squarer +squat=ting +squeaky +squeez=ing +squirmed +stab +sta=bi=lizes +stacked +staffing +stagers +stags +stair=cases +stale=mate +stalling +stam=mer +stam=ped=ing +stan=dard +stand=ings +stans +star +star=gate +star=ring +star=tles +state +statewide +sta=tion=mas=ter +stat=ues +statu=to=rily +staves +stead=ier +stealer +steamer +steele +steeper +steered +stem +stenog=ra=pher +step=mother +stereo=scopic +ster=il=izer +stetho=scope +stew +stick=ier +stiff=ens +stigma +stillest +stim=u=late +sting=ing +stipends +stir=rer +stitch=ing +stock=holder +stodgy +stom=acher +stood +stop=gap +stor=age +storeyed +stormi=est +stouter +strafe +straight=ened +strained +strand=ing +stran=gler +stratagem +strat=i=fies +straw=berry +streamer +street=car +strengths +stretched +strict +strife +stringed +stringy +striptease +strode +strolling +stron=tium +strug=gle +stu=art +stucco +stu=dious +stuffs +stun +stupid +sturm +styli +styx +sub=com=po=nents +sub=di=rec=tory +sub=dues +sub=graph +sub=jec=tive +sub=lime +sub=merges +sub=mode +sub=or=di=nate +sub=pro=gram +sub=schema +sub=script=ing +sub=se=quent +sub=si=dies +sub=sis=tent +sub=stan=tially +sub=sta=tion +sub=strate +sub=sys=tem +sub=tle=ness +sub=trac=tion +sub=units +sub=vert=ing +suc=cess=ful +suc=cinct=ness +suck=ers +sud=den +suf=fer=ance +suf=fi=ciency +suf=fo=cated +sug=ar=ings +sug=gests +suit=ably +suits +sulks +sul=tan +sum=mands +sum=ma=tion +sum=mon +sumter +sun=der +sunken +sun=shine +su=per=com=put=ers +su=per=groups +su=pe=rior +su=per=nat=u=ral +su=per=sede +su=per=vise +sup=pers +sup=ple=ment=ing +sup=port +sup=pose +sup=press=ing +surely +surge +surly +sur=pass +sur=pris=ingly +sur=round +sur=vey=ors +sus +sus=pended +sus=pi=cions +suther=land +swab +swal=low=ing +swan +swaps +swat +sweat +swedes +sweep=stakes +sweet=est +swellings +swifter +swim=suit +swipe +switch=boards +swivel +swords +sykes +sylvia +sym=bol=ize +sym=me=try +sym=pa=thy +syn=a=gogue +syn=chro=nizes +syn=di=ca=tion +syn=ony=mously +syn=the=size +syr=ian +sys=tem=at=i=cally +taber=na=cle +ta=ble=spoon=ful +tab=u=late +tacit +tac=tic +tail +taipei +tale +talker +tallchief +tal=mudiza=tions +tam=ing +tanaka +tan=gle +tan=ta=liz=ing +taos +tapestry +tar +tar=iffs +tasked +taste=fully +tat=tered +taunts +tav=erns +taxi=cabs +tay=lor +teaches +tear=ful +tea=spoon=ful +tech=nique +te=dious +teenaged +tegu=ci=galpa +tele=graph +tele=o=log=i=cally +tele=phony +tele=vise +teller +tem=per=ance +tem=pes=tu=ous +tem=po=raries +tempt=ingly +ten=dency +tenex +tense +tent +tenure +ter=mi=nat=ing +termwise +terre +ter=rify +ter=ror=ize +testable +tes=ti=fiers +tex +tex=tile +thai=land +thank=less +thaw +the=atri=cally +theme +the=ol=ogy +the=o=riza=tion +ther=a=pies +thereof +ther=mome=ter +thes=saly +thickly +thim=bles +think=ing +thirsted +this=tle +thorns +those +thou=sand +thread +threat=ens +thrift +thrived +throne +through=out +thrusters +thumbed +thun=derer +thus +tiburon +tick=les +ti=died +tier +tight=en=ers +tilde +tillich +tim=bered +time=outs +timeta=bles +ti=m=o=nizes +tin=gling +tin=kled +tint +tip=per=ary +tire=lessly +ti=tan +tit=ter +toasts +to=geth=er=ness +toi=lets +tol=er=a=ble +tol=er=a=tion +toma=toes +ton +tonic +tool +tooth=paste +top=most +topsy +tor=ment=ing +tor=rent +tor=tur=ing +tossed +to=tallers +touch=able +tough +tourist +tow=el=ing +towns +toys +tracked +trac=tor +trader +traf=ficked +trailed +trainer +tramp +trances +transceivers +tran=scribers +trans=feral +trans=formable +trans=gressed +tran=sis=tor=ized +tran=si=tively +trans=la=tion +trans=mit=tal +trans=par=ent +transpon=der +trans=pose +trape=zoidal +trauma +traver=sal +trays +trea=sure +treat=ing +tree=top +tremor +tres=passed +tri=an=gles +tri=bunals +tricked +tricky +trig=gered +trilled +trim=ming +tripled +tri=umphal +triv=ially +troop=ers +trot=sky +trou=bleshoots +trow=els +truck=ing +truest +trumped +trunk +trust=ingly +try +tubs +tuft +tum=bled +tun=able +tunisia +tur=bu=lent +turk=ize +turn=ing +tur=tle +tu=tankhamen +tut=tle +twenty +twiner +twirling +twitch=ing +tyler +type=writ=ers +typ=ing +tyranny +ugh +ul=cers +um=brage +un=ac=cept=ably +un=aided +unan=i=mous +unattain=abil=ity +un=aware +un=blocked +un=can=celled +un=chang=ing +un=closed +un=con=di=tional +un=con=trol=lable +un=count=able +un=de=cid=able +un=der=brush +un=der=flows +un=der=lies +un=der=mine +un=der=plays +un=der=stand=ings +un=der=tak=ings +un=der=writes +undi=rected +un=done +un=easy +un=equaled +un=event=ful +un=fair=ness +un=fit +un=for=mat=ted +un=grate=fully +un=harmed +uni=di=rec=tion=al=ity +uni=for=mity +unin=dented +un=in=ter=rupted +unions +uni=tar=ian +unity +uni=ver=si=ties +un=kind=ness +un=leashed +un=link=ing +un=lucky +un=mer=ci=ful +un=nec=es=sar=ily +un=ob=tain=able +un=paid +un=prece=dented +un=prov=able +un=rav=el=ing +un=rec=og=nized +un=re=strained +un=safely +un=s=e=lected +un=skilled +un=steady +un=syn=chro=nized +un=tie +un=to=ward +un=used +un=whole=some +un=winds +un=wrap +up=dater +up=holder +up=land +up=rightly +up=sets +up=turns +urge +uri=nates +ur=su=line +us=ages +usenix +usu=ally +uti=liza=tion +utopi=anizes +ut=ters +va=ca=tion +vac=u=umed +va=grantly +va=lence +valiant +valid=ness +valu=ably +valves +van=den=berg +van=ished +van=quish=ing +vari=ably +varies +vary=ing +vastly +vau=dois +vax +veer=ing +veg=e=tated +ve=hic=u=lar +ve=lasquez +vene=tian +ven=omous +ven=tri=cles +venus +ver=bal=ized +ver=dure +ver=i=fier +vern +ver=sa=tile +ver=te=brates +vested +vet=eri=nary +via +vi=bra=tions +vi=cious=ness +vic=tim=iz=ers +vic=to=ries +vi=dal +vier +view=ing +vi=gnettes +vil=i=fi=ca=tion +vil=lages +vinci +vine=yard +vi=o=la=tor +vi=o=lins +virgo +virus +vis=i=ble +vis=ited +vi=su=al=ize +vi=tally +vladimir +vo=ca=tions +voided +vo=li=tion +volt=ages +vol=un=teer=ing +voted +vouch=ing +voy=aged +vul=garly +waco +waf=fles +wag=ne=r=ian +wail=ing +waiter +waives +wak=ing +wal=green +wal=len=stein +walls +waltham +wan=dered +wan=ing +wants +ward +ware=hous=ing +warmer +warn=ing +war=ranted +war=saw +wash=burn +wasps +watch +watch=man +wa=ter=ing +wa=tery +wausau +wave=length +wax=ers +we +weak=nesses +wear +weari=somely +weath=er=ford +webs +wed=lock +weekly +wei=d=man +weights +weiss=muller +welder +welles=ley +wenches +wes=leyan +west=hamp=ton +wet +whacked +wharves +wheel +whelp +wher=ever +whims +whip=pany +whirling +whiskers +whis=tled +white=horse +whitens +whit=lock +whit=tling +whole=ness +whoop +wi=chita +widen +wid=owed +wield=ing +wilbur +wile +wilkin=son +william +willis +wilshire +wince +wind=ing +wine=head +win=ing +win=nie +win=sett +wiped +wire=tap=pers +wised +wish=ful +witches +with=drew +with=holds +wit=ness=ing +woe=fully +wom=an=hood +won=der=ful=ness +woo +wooden +wood=stock +woofer +woonsocket +words +work=books +work=man +world=li=ness +wor=rier +wor=shiper +worth=less +wound=ing +wrap=per +wreathes +wrenched +wretch +wring +writ +writ=ing +wrote +wyner +xe=roxed +yamaha +yard +yawner +years +yel=lowed +yelped +yes=ter=days +yok=na=p=ataw=pha +york=shire +young=sters +youth=ful=ness +yukon +zeal +zen +zeus +zion=ism +zoned +zoroaster diff --git a/third_party/rust/mapped_hyph/tests/base.word b/third_party/rust/mapped_hyph/tests/base.word new file mode 100644 index 0000000000..6d1e60849c --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/base.word @@ -0,0 +1,4543 @@ +aarhus +abase +abate +abbeys +abby +abducts +aberrations +abhorrer +abilities +abjuring +ablest +abodes +abolitionist +abortion +about +abram +abridged +abruptly +absconds +absently +absolved +absorption +abstinence +abstractor +abundance +abuts +abyssinian +acapulco +accelerator +accentuated +accepted +accessibility +accidental +acclimated +accommodating +accompanying +accomplishments +accords +accountant +accretion +acculturation +accuracy +accused +aces +achieve +acid +acknowledgeable +acme +acoustics +acquiescent +acquisitive +acres +acropolis +actinometers +activators +actors +actuarial +acute +adagios +adaptation +adapts +addicting +additions +addresser +adducing +aden +adherents +adiabatically +adjoining +adjudging +adjured +adjustment +administer +administratively +admire +admissions +admixed +admonitions +adopted +adore +adrian +adsorbs +adulterer +adumbrating +advantageous +adventurers +adversely +advertises +advisees +advocacy +aerate +aerobacter +aerosols +affairs +affections +affiliating +affirmed +affliction +affords +afghans +aforethought +africanizes +afterimage +afterward +age +ager +agglutinated +aggravation +aggressive +agile +agitator +agonies +agreeably +agriculturally +aide +ailing +aims +airdrops +airfoil +airline +airplanes +airtight +akin +alamo +alas +albatross +albums +alcoholism +aldrich +alerting +alexandria +alga +algerian +algorithms +alicia +aligned +alistair +allan +alleges +allegory +allergic +alleyway +alliteration +allocator +allots +allowing +allurement +almaden +alnico +aloofness +alphabetized +alsatian +alterations +alternates +althaea +altruistically +alveolar +amalgamate +amass +amaze +amazons +ambiguous +ambled +ambushed +amend +america +americans +amide +ammonia +among +amortized +amour +amphibians +amplifiers +amputated +amusement +anabaptist +anagram +analogue +analyticities +anaphoric +anastomoses +anatomy +anchorite +andalusia +andover +anecdote +anesthetic +angeleno +angered +anglia +angola +angular +animated +animism +anita +annapolis +annihilated +annotation +annoy +annually +annuls +anodes +anomalously +anselm +ant +antagonizes +antedate +anthologies +anthropomorphically +anticipates +antidotes +antimony +antiquate +antisemitism +antithetical +antonio +anxious +anyway +apathy +aphasia +apiary +apocryphal +apologist +apostolic +appall +apparently +appear +appeasement +appended +appertains +applauds +applicable +applier +appointer +apportioning +appraisers +appreciation +apprehensively +approach +appropriate +approval +approximated +april +aptly +aquifer +arabians +aramco +arbitrating +arcades +archaism +archeologist +archimedes +architectures +arcing +ardently +arequipa +argos +argument +aridity +aristocrat +arithmetize +arm +armchairs +arming +armpits +arousal +arrack +arrangement +arrears +arrhenius +arrogate +arroyo +arterial +arthritis +articulately +artifact +artillerist +arts +ascendant +ascent +ascot +ashamedly +ashtray +asiatics +asking +aspersions +aspiration +ass +assassinated +assay +assemblies +asserter +assess +assiduity +assigning +assist +associate +associator +assuaged +assure +assyrianize +asteroid +astonishingly +astride +astronomically +asymmetric +asynchronously +atheism +athletes +atlas +atomization +atonement +atrophies +attaching +attain +attempt +attendants +attentionality +attenuator +attired +attracted +attributable +attune +auburn +audibly +audiometer +auditions +auger +august +aural +auscultated +austerely +austrianize +authenticator +authorities +authors +autocorrelate +autodecrements +autoindex +automaton +autopilot +autumnal +availer +avaricious +avenues +avers +avian +avionic +avoidable +avow +awakened +awards +awfulness +awry +axiological +axioms +ayes +azure +babelizes +babying +bacchus +backbend +backfill +backorder +backscatters +backstitch +backtracks +backyard +badger +baffle +baggage +bagrodia +bailiff +baits +bakes +balancers +baldwin +balkanization +balks +baller +balloon +ballplayer +balsam +bamboo +bandage +bandpass +bane +bangui +bank +bankrupts +bans +baptism +baptized +barbarism +barbells +bards +barest +barhop +barks +barnhard +barometers +barr +barren +barron +barter +basalt +baseless +bash +basics +basketball +bassinets +batavia +bather +bathtub +batted +batting +battlements +baudelaire +bawling +bayonet +be +beaded +beaker +beanbag +bearded +beast +beatification +beau +beautified +beavers +becker +becomingly +bedder +bedpost +bedspread +beecham +beefy +beethoven +befell +befoul +befuddles +beggary +begotten +beguiling +behaviorism +behold +being +belay +belfry +believable +belittles +belles +belligerents +bells +belong +belting +bemoans +bendable +benedictions +beneficiary +bengal +bent +bequeath +berating +beresford +beribboned +berliners +bernardine +bernoulli +bertie +besets +besmirched +bespoke +besting +bet +betrayed +bette +between +bewail +bewilderment +bianco +bibles +bicarbonate +biconvex +bidder +biennial +bigger +biharmonic +bilabial +bilk +billet +billings +bimetallism +bind +binghamton +biochemistry +biologically +biopsy +bipeds +birdbaths +birminghamize +births +bisectors +bisques +bites +bitterly +bivalves +blabbermouths +blackburn +blackfoots +blackmailed +blacks +blaine +blamers +bland +blanketers +blares +blasphemousness +blatz +bleachers +bleating +blemishes +blessings +blindfold +blinked +blissfully +blizzard +bloch +blockers +blond +bloodiest +bloom +blossoms +blowfish +bludgeons +blueprint +bluish +blunted +blurring +blushing +boarded +boaster +boathouse +boatswain +bobbsey +bodenheim +bodybuilding +bogart +bogus +boilers +boldface +bolshevist +bolton +bombastic +bonanzas +bondsman +bonham +bontempo +bookcases +bookkeepers +bookstore +booms +booster +bootle +bootstrapping +borden +borealis +born +borrowers +bosses +botanist +bother +bottler +botulism +bounce +bounden +bouquet +boutique +bowdlerizing +bowl +bowstring +boxing +boyfriend +braced +bradbury +bragger +braille +brainstorm +brakes +branchings +brandishing +brashly +braun +braving +braying +brazil +bread +breadwinners +breakfast +breakthrough +breastworks +breathlessly +breeding +brennan +brevet +brewery +bribers +bricklayers +bridge +bridgework +briefed +brig +brighten +brighton +brimming +brings +bristle +britisher +broaches +broadcasts +broadly +broglie +brokenness +bronchial +brooch +brookfield +broth +browbeat +brownian +bruce +brunette +brushing +brutalized +bryce +buchwald +buckler +bucky +buddies +budgeters +buff +buffetings +bugger +bugs +built +bulging +bulldoze +bullfrog +bullying +bumbling +bumptious +bundle +bungler +bunkhouse +bunted +buoys +bureaucracy +burgher +burglarproofing +burke +burn +burnings +burntness +burrowing +bursty +busch +bushwhacked +businesslike +bustards +butchered +buttercup +butternut +buttonholes +butyrate +buzzard +bye +bypassing +bystander +byzantinizes +cabinet +cache +cactus +caesarize +cager +cajole +calais +calculate +calculus +calgary +calico +callaghan +calloused +calmingly +caltech +calypso +camembert +camino +campaigning +camps +canadianize +canceled +candidacy +candler +canine +cannery +cannon +canonical +canopy +canto +canvassed +capable +capacitors +capita +capitalizers +capping +capstone +captivates +capturer +caravans +carbondale +carbonizing +cardboard +cardiology +carefully +caressing +caricature +carlsbad +carnation +caroline +carpenters +carriages +carruthers +carter +carton +carve +cascades +cashed +casings +cassette +castes +casts +catalina +catapult +catches +categorizes +cathedral +catholicisms +cattle +cauldrons +causer +cautioner +cavalierness +cavernous +cawing +cecil +celanese +celerity +cellist +celticizes +censoring +centaur +centimeter +centralized +centroid +cerebral +certainties +certifies +cezanne +chaffey +chairing +chalices +challenging +champaign +chancellor +changeability +channeled +chanter +chapel +chapter +characterize +chargeable +charitable +charlotte +chars +chartings +chasing +chastisers +chattel +chauffeured +cheaply +checkbook +checkout +cheekbone +cheeriness +cheeses +chemise +cherishes +cheryl +chests +cheyennes +chicanos +chides +childhood +chill +chime +chinas +chinning +chiropractor +chit +chloroplasts +choir +choose +chopping +choreograph +chou +christenson +christianizing +christoph +chronicle +chronology +chuckles +churchgoing +churn +ciceronianize +cinderella +ciphertexts +circuitously +circulating +circumnavigates +circumstanced +circuses +cities +civet +civilized +claimed +clambers +clams +clapboard +clarifications +clash +classes +classifiers +clattered +claustrophobia +cleaned +cleansed +clearer +cleaved +clemente +clerked +cliches +cliffs +climb +clincher +clink +clippers +cloaks +clockings +clogs +closeness +closing +clothing +clouding +clowns +clucks +clumsy +clutching +coaches +coalition +coastal +coating +coaxing +cobweb +cockpit +cocoon +codes +codifies +coefficient +coexist +coffer +cogitated +cogs +cohering +coils +coinciding +colder +colicky +collaborator +collared +collecting +colleges +collins +colombia +colonies +colons +colorless +columnize +combated +combinator +combings +comedic +cometary +comforting +comma +commandment +commemorative +commended +commenting +commissioners +committeemen +commonalities +commonwealth +communicated +communists +commuting +compactors +comparably +comparison +compassion +compelling +compensatory +competitions +compilers +complaint +completed +complexities +complications +complimenting +composedly +compost +comprehensibility +compression +compromisers +compulsory +computed +comradely +concatenation +concede +conceived +concentrators +conceptualized +concerted +conciseness +concoct +concretes +concurring +condemns +conditional +condoned +conduction +confectionery +conferred +confessions +confidential +configure +confining +confiscates +confocal +confounding +confucian +confusion +congo +congregating +congresswomen +conjoined +conjuncture +connected +connector +connors +conquered +conrail +consecrate +consenting +conservation +conserved +considered +consistent +consolers +consonants +conspirator +constant +constituent +constitutions +constructed +constructs +consultant +consumed +consumptions +contain +contaminated +contemplative +contender +contentment +context +continents +continuations +contortions +contracting +contradicting +contraptions +contribute +contrite +controllability +controversy +convened +conventionally +conversantly +conversion +convex +convict +convinces +convoys +cooked +coolers +coon +cooperations +coordinates +copeland +copings +coprocessor +coquette +cords +corinthian +corks +cornered +corns +coronary +corporately +correct +correctness +correspond +corridors +corroborative +corruption +corvallis +cosmopolitan +costs +cots +cotyledon +coughs +councilwoman +counselors +counteracting +counterfeited +counterpart +countersunk +countrywide +couplings +courser +courtesies +courtrooms +covenant +coverlet +covetousness +cower +cowl +coypu +crackers +cradles +craftsperson +cramps +crank +cranny +crater +crawford +craze +creaked +creams +creation +credence +creditor +creeks +cremates +crescents +cretin +cricket +criminal +cripple +crisscross +criticizes +croaks +crocus +cropper +crosser +crosstalk +crowd +crowning +crucifixion +cruel +cruising +crumpled +crusade +crushes +crux +cryptic +crystallize +cubans +cucumbers +cufflink +culminate +cultivable +cultural +cummings +cupboard +curb +curing +curlers +current +currying +cursory +curtly +curving +custer +customizable +cut +cuts +cyanamid +cyclically +cygnus +cypress +cytoplasm +dabbles +dadaistic +dahl +dairy +dali +damages +damns +damsel +dancing +dangle +danize +dare +darken +darn +darted +darwinizes +database +dates +daunted +davy +daydreams +dazzled +deaden +deaf +dealings +deanna +death +debater +debilitates +debtor +debutante +decay +deceit +decelerate +decent +decidability +decimate +decision +decks +declarer +decliners +decodings +decomposition +decorative +decreases +decrements +dedicated +deduct +deeding +deep +deere +defeats +defendant +defenestrating +deferments +deficiencies +define +definitions +deformation +defy +degradation +deify +dejected +delaying +deleter +deliberated +delicacies +delightful +delimiting +delirious +deliveries +delphic +deluged +demand +demeter +demodulate +demons +demonstrator +demultiplex +denebola +denigrates +denominators +denoting +densest +dentists +denying +departure +dependent +depleted +deploy +depose +depositors +depreciated +deprivations +dequeued +deregulate +derive +descend +descents +descriptively +deserters +deservings +designator +desire +desolate +despatched +despite +destabilize +destroyed +destructiveness +detacher +detained +detective +deteriorated +determination +deterministic +detractor +devastate +development +deviation +devised +devotedly +devours +dexedrine +diagnose +diagonals +dial +dialogue +diamond +diarrhea +dickinson +dictatorial +diddle +dies +dietrich +differentials +differers +diffusely +digest +diggings +digits +digressing +dilapidate +diligence +dilution +dimensions +dimmed +dine +dining +diogenes +diphthong +dipper +direction +directorate +dirt +disable +disaffection +disallowing +disappearances +disapprove +disassembles +disbands +discarding +discerns +disciplines +disclosure +disconnects +discord +discouraging +discovery +discretion +discuss +disease +disfigure +disgruntle +disgustingly +dishonestly +dishwater +disjoint +disk +dislocates +dismaying +dismissers +disobedient +disown +dispatched +dispensary +dispersed +displacing +displeasing +disposition +disputer +disquieting +disruption +dissemble +dissenter +dissimilarities +dissociating +distaff +distastes +distilling +distinguish +distorts +distresses +distributivity +disturbed +ditty +divergence +diversifies +diverting +dividend +divining +divisors +dixieland +doberman +doctoral +documentaries +dodecahedra +doe +doghouse +dolan +dollies +domenico +domicile +domineering +donahue +donkey +dooley +doorman +dopers +doric +dortmund +doted +doubleheader +doubt +doubts +doves +downey +downloading +downstairs +doyle +draconian +drafty +dragooned +dram +drape +draughts +drawings +dreaded +dreamers +dregs +dressing +dries +driller +drip +driveway +droop +droppers +droves +drudgery +drummers +drunkly +duality +dubuque +ducts +dug +dullness +dumbly +dunbar +dungeons +duplicable +dupont +duration +durward +duster +dutchman +dwarfed +dwelt +dyeing +dynamism +dysentery +ear +earmarked +earnestness +earth +earthquakes +eases +easterner +easy +eaves +eben +echoed +ecology +economize +ecuador +edenizes +edict +edition +edmonton +educating +edwards +effecting +efficacy +effortlessness +eggshell +egyptianize +eigenstate +eighthes +eisner +ejecting +elaborately +elapses +elderly +elections +electrically +electrocute +electroencephalography +electronics +elemental +elevation +elicited +eliminating +elite +ella +ellipsoids +elmhurst +else +elucidation +ely +emancipate +embarrass +embeds +embodied +embracing +emerald +emeritus +emil +emits +emotionally +emphasizing +employable +emporium +emptily +emulator +enacted +encamping +enchanter +encircled +encoder +encounter +encouragingly +encumbered +endangers +endemic +endorse +endows +enduringly +enfeeble +enfranchise +engels +engines +englishmen +engulf +enjoin +enjoys +enlightened +enlivens +enormity +enquirer +enriches +ensembles +ensnaring +ensures +enterprise +entertainment +enticed +entitle +entreat +entrepreneurs +enumerated +enveloped +environ +envisioned +ephemeral +epicurizes +episcopalian +epitaphs +epochs +equalize +equates +equilibrate +equips +equivocally +erased +ere +ergo +erlang +erode +erratum +errs +escalates +escapes +escorts +especially +esquires +essentially +estates +estimated +eternal +ethernets +etruria +eulerian +eurasia +europeanized +evade +evaluative +evaporation +evenhandedness +events +everglades +everything +evidences +evinces +evolve +exacerbated +exactions +exaggerations +examined +exasperates +exceeded +excellently +exceptions +exchangeable +excision +excitingly +exclamatory +exclusiveness +excreting +excused +executional +exemplified +exempts +exertion +exhaustedly +exhibitions +exile +existentialist +exorbitant +expanders +expect +expects +expeditious +expenditure +experiencing +experiments +expires +explanations +exploit +explorations +explosive +exponentiating +exports +exposure +expressibility +expulsion +extemporaneous +extensive +exterminate +extinguished +extract +extraneous +extrapolation +extremely +exult +eyeglasses +eyesight +fables +facade +facile +facsimile +factories +faculty +fagin +failsoft +faintness +fairing +faithful +fakes +fallacious +falmouth +falsifying +familiar +families +fanaticism +fanciness +fanning +farad +farewells +farmers +farrell +fascination +fasted +fastidious +fate +fathomed +fatten +faulkner +fauna +favoring +fayette +fearlessly +feat +featherweight +fed +feebleness +feeds +feet +feline +fellowships +feminism +fencing +fermentation +ferociously +fertile +fervent +festivity +fettered +feverish +fiat +fibrously +fiddled +fief +fiendish +fifteenth +fighting +fiji +files +filled +filming +filthy +finals +finder +fines +fingerprint +finishes +finnish +fireboat +firemen +firewall +firming +fiscally +fishes +fissured +fitly +fitzpatrick +fixation +fixture +flagged +flak +flamer +flanking +flash +flask +flattered +flaunting +flawlessly +fledglings +fleetly +flemishing +flew +flicking +flinches +flirt +floated +flood +floors +florentine +flossing +flourished +floweriness +fluctuate +fluffier +fluoresce +fluting +flying +focal +foes +fogy +folders +folksy +folsom +font +fooled +football +footing +forage +forbes +forcer +forearms +forefathers +foreign +foreseeable +forestalls +forever +forge +forgettable +forgiving +forlornly +formalized +formatively +formicas +formulated +forsaken +forthwith +fortiori +fortuitously +forwarder +fought +foundation +founds +foursome +foxes +fragile +fragrantly +framing +francie +francoise +franking +fraser +fray +freckle +frederico +freeing +frees +freezing +frenchizes +frequented +freshened +freshness +freudianism +friction +friendlier +friezes +frightful +frisia +frivolity +from +fronts +frothing +fruehauf +fruits +fuchsia +fujitsu +full +fumbling +functionally +fundamentally +fungal +funnier +furlong +furniture +furthermore +fuses +futuristic +gabled +gadgetry +gaging +gaines +galactic +galaxy +gallantly +gallon +gallstone +gambled +games +gangplank +gaped +garbed +gardner +garlanded +garrisoned +gaseous +gaspee +gastric +gathered +gauche +gaunt +gawky +gaze +gearing +gelatin +gemma +generality +generals +generic +genetic +genre +gentler +geodesic +geological +geophysical +geraldine +germane +germinates +gestapo +getting +ghosted +gibraltar +gig +giggle +gilds +gilt +ginghams +gipsy +girlish +giver +gladdest +glance +glaring +glazed +gleaner +glenda +glimmer +glints +gloat +gloria +glorying +glove +glowing +glynn +gnu +goats +goblins +godmother +goethe +goldenly +goldstine +gondola +goode +goodyear +goren +gorton +got +gothicizing +gouging +government +grab +gracefully +gradations +gradual +graft +grained +grams +grandfather +grandpa +grant +granulates +graphical +graspable +grassiest +gratification +gratuitously +graves +grayed +grease +grecianize +greeks +greenfeld +greens +greeter +grenades +greyest +grievances +griffith +grimes +grinds +gripped +gritty +grocers +grooved +grossest +groton +group +grovels +growling +grubs +grumbling +guano +guardedly +gubernatorial +guest +guideline +guiltier +guises +gullah +gumming +gunner +gurgle +gustafson +guts +guyer +gymnastics +haas +habitual +hacks +hag +hail +hairier +hale +hallmark +halpern +halve +hamburgers +hammering +hampshire +handbooks +handicap +handkerchiefs +handshake +handy +hangman +hannah +hansel +haplessly +happily +harbinger +harder +hardships +harken +harmfulness +harmoniously +harnessing +harriman +harry +harvardize +harveys +hassle +hat +hatefully +hattie +hauler +hausa +havoc +hawthorne +haywood +head +headlands +headroom +heals +healy +hearings +heartily +heater +heaved +heaviness +hebrides +hedgehog +heeds +hegelianizes +heights +heiresses +helicopter +hellenized +helmet +helpfully +hem +hemp +hendrick +henrietta +heralds +herder +hereford +hereunder +hermit +heroically +herring +hertzog +hesperus +heterogenous +heuser +hexagon +hibernate +hidden +hierarchic +highfield +highnesses +hikes +hillcrest +hilt +hindered +hindustan +hinting +hired +his +histograms +hitch +hither +hitting +hoarseness +hobby +hoe +hoists +holds +hollandaise +hollowness +holocaust +homage +homeomorphism +homespun +homing +homosexual +honesty +honeymooning +honoraries +hoodlum +hooker +hoosierize +hooves +hopelessness +horace +horn +horrible +horrors +horseshoer +hospitalize +hostesses +hotly +hounding +houseflies +housetop +hover +howled +hubert +huey +hugo +humanities +humbling +humidifiers +humiliation +humorers +humpty +hung +hungry +huntley +hurling +hurrying +husbands +husks +hutchins +hyde +hygiene +hyphenate +hypotheses +hysterical +ibsen +icicle +icosahedron +idealize +identical +identify +idiosyncrasy +idles +ignite +ignores +illegality +illogical +illusions +illustrative +imagen +imagine +imbrium +immaterial +immensely +immigrating +immovability +impacted +impale +impatiently +impedes +impenetrable +imperfectly +impermanent +impersonations +impinges +implementable +implicants +implied +important +imposes +impotence +impractically +impressible +impressment +imprisonments +improvement +improvisers +impulsion +inaccessible +inadequate +inane +inaudible +inca +incas +incessantly +incidentally +inciting +incloses +inclusiveness +incomparable +incompletely +incongruity +inconsistent +inconvenient +incorrectness +incredulous +incubate +incurable +indecisive +indent +indescribable +indexing +indication +indifference +indignation +indirectly +indistinct +individually +indoctrinating +indubitable +inductances +inducts +industrialist +industry +inelegant +inertly +inexact +inexplicable +infantry +infection +inferior +infertile +infinite +infirmary +inflated +inflicting +inform +informatively +infrequently +infuriating +ingeniousness +ingratiate +inhabited +inherently +inheritress +inhibitor +inimical +initialized +initiating +injection +injured +inker +inlet +inner +innocuousness +inoculate +inquire +inquisitive +inscribed +insecurely +insertion +insidiousness +insinuated +insistently +insomnia +inspiration +installation +instances +instantiations +instill +institutes +instruct +instructs +instruments +insulation +insurance +insurrection +integrand +intellect +intelligible +intensification +intensively +inter +intercept +interchanged +intercommunicates +intercourse +interested +interfered +intergroup +interleaved +interminable +intermodule +internationality +interpersonal +interposed +interpreting +interrelations +interrupt +intersecting +interstate +intervening +interwoven +intimation +intolerance +intractability +intraoffice +intrigued +introductions +intruder +intubation +invaders +invalidities +invariants +inventively +inverses +inverting +investigative +inveterate +invites +invoked +involves +ionians +ira +irately +irishman +ironic +irrational +irregular +irrepressible +irreversibility +irritable +irving +isfahan +island +isolated +isomorphisms +issuance +it +italicize +itemizations +iteration +ito +izvestia +jackets +jacky +jacobus +jailer +jamaican +janet +janus +jargon +jauntiness +jay +jeanne +jeffersonian +jennifer +jeremy +jeroboam +jest +jesuitizing +jeweled +jews +jingled +joaquin +joes +john +joiner +jokers +jolts +jordan +josephus +jotting +journals +jousting +joyous +judaica +judge +judith +jugoslavia +julie +jump +junctures +juniper +juras +jury +justifiers +jutland +kaddish +kamikazes +kant +karp +katowice +keeling +keepers +kemp +kenney +kepler +kerouac +key +keypad +khrushchevs +kidde +kidney +kilimanjaro +kills +kilojoule +kimono +kindling +kingpin +kinnickinnic +kirchoff +kisses +kiting +klein +knapsacks +kneel +knickerbockers +knights +knocked +knots +knowledge +knuckles +kodachrome +korea +krakatoa +kronecker +kurd +labeling +laborer +labyrinths +lacerta +lacks +ladies +lagoon +laidlaw +lamarck +lament +lamp +lanced +landings +lands +lange +languish +laos +lapse +largely +larson +lashing +laszlo +later +latinity +latitudes +laudable +laughlin +laundered +laurels +lavender +lawfully +lawsuit +layers +lazarus +leaded +leafed +leaguers +leander +leaping +leary +leathern +leaving +lectures +leeds +leftists +legalization +leger +legislated +legitimate +leila +lemon +lends +leniency +lens +leonardo +lesbian +lesson +letter +levee +levelly +levin +lewdly +lexington +libelous +liberated +libido +license +lick +lied +lifeboat +lifetime +ligget +lighthearted +like +likeness +lilian +liman +limit +limits +lind +lindy +linearly +lingerie +lining +linnaeus +lioness +liquid +lise +listened +listings +literalness +lithuania +littering +live +livers +lizzie +loaf +loathing +lobster +locally +locator +lockian +lockwood +lodges +logarithm +logically +logs +loiters +londonization +loners +longings +lookers +looms +looseleaf +loosing +lords +lorry +lossiest +lotus +louisa +lourdes +lovelace +loves +lowest +loyally +lucerne +luckier +ludicrous +luke +luminously +lunch +lunged +luring +lust +luther +luxuriantly +lyle +lynx +mac +macdonald +maces +machinery +mackey +macromolecule +madden +madhya +madsen +magellanic +magill +magnetizable +magnify +maguire +maids +mailman +mainframes +maintained +majesty +maker +malady +malcolm +malformed +maliciousness +malone +malton +manage +managing +mandatory +manger +manhole +manicuring +manila +manipulative +mann +manors +mantissa +manufactured +mao +maps +marched +mardis +margo +marinade +maritime +marketability +markings +marmalade +marriott +marshaling +martial +martyr +marvels +mascara +maskable +masonite +massacred +mast +masterpiece +masturbation +matchless +materializing +mathematically +matings +matrix +mattered +matured +mauricio +maxima +maxims +maybe +mayoral +mccabe +mccluskey +mcdonnell +mcgovern +mckee +mclean +mcpherson +mealtime +meaningful +meant +measurements +mechanically +medal +medfield +mediations +medicine +meditating +mediums +meeting +megahertz +meister +melcher +melodies +melpomene +membership +memoranda +memorizes +menagerie +mendelizes +mennonite +mentalities +mentor +mercenariness +mercilessly +merged +meritorious +merrill +mesh +messenger +messy +metallization +metaphysical +meteoritic +methodically +methods +metro +mews +mica +mick +microbicide +microeconomics +micron +microprocessing +microscope +microvaxes +middleman +midnight +midstream +midwinter +migrate +mikoyan +mileage +milk +mill +millikan +millionth +millstones +miltonized +minaret +mindfully +mineral +mini +minima +minimizes +ministries +minor +minstrels +minute +miracle +miriam +miscarriage +misconception +miserably +misgivings +misled +misplacing +missing +missoula +mistake +mistletoe +misunderstand +mitch +mitres +mixtures +moats +mocked +modally +moderated +modernizer +modicum +modifying +modularizing +module +moghul +moines +moldavia +moles +mollusk +momentarily +monaco +monday +mongolian +monkeyed +monocotyledon +monolithic +monostable +monroe +montague +montgomery +monument +mooned +moor +moped +morass +morehouse +morn +morphological +morsels +mortgage +mosaic +mosque +motels +motherland +motionlessness +motley +motorized +mound +mountainously +mourners +mousy +movable +moving +muck +muddled +muffin +mugs +mullah +multicomputer +multiple +multiplicand +multiplies +multistage +mumbles +mundane +munitions +murdering +murmurs +muscovy +mushroomed +musicians +muskrat +mussorgsky +mutability +mutations +mutilating +mutters +mycenaean +mysterious +mythologies +nagasaki +nair +nakedness +names +nanook +napkin +narcotic +narrowest +nash +natal +nationalities +nations +naturalist +naughtiness +navel +navona +neanderthal +nears +nebula +necessitation +neckties +needled +needy +negatives +negligible +negroid +neighboring +neoclassic +nero +nesting +nets +neural +neutral +neva +newburyport +newman +newsman +next +nibelung +nicholls +nickname +nielson +nightfall +nihilism +nimbler +nineties +nipponizes +nobility +nocturnally +noel +nolan +nominee +nonconservative +nondeterminism +nongovernmental +nonlinearity +nonorthogonal +nonsegmented +nonterminals +nook +nordhoff +normalization +normanizations +north +northernly +norwalk +nostradamus +notarizes +note +noticeable +notifies +nottingham +novak +novices +nuances +nuclide +nullary +number +numerable +numismatic +nursing +nutritious +nyquist +oases +obedient +obfuscate +objectively +obliged +obliterating +obscene +observable +observers +obsoletes +obstruction +obviated +occasional +occidentalize +occlusions +occupied +occurs +octagonal +octets +oddly +odious +o'dwyer +offended +offer +officer +officiously +oft +oilcloth +ojibwa +oldenburg +oleomargarine +olivia +olympus +ominousness +omnipresent +o'neill +onlooker +onus +opaquely +openings +operate +operator +oppenheimer +oppose +oppressed +opthalmic +optimist +optimizing +opts +oranges +orbital +orchestral +order +ordinarily +ores +organization +organs +orientalized +orifices +origination +orleans +ornate +orr +orville +oscillates +o'shea +osteopath +othello +otto +ounces +outburst +outdoor +outgrowing +outlawing +outlive +outperforms +outrages +outstanding +outvoting +outwitting +overboard +overcrowds +overestimates +overhangs +overjoyed +overload +overnighter +overproduction +overrunning +overshadowing +oversized +overtake +overtly +overuse +overworking +owen +ownership +oxidized +ozzie +pacification +packaged +packers +padding +pageant +paginating +painful +painting +pajamas +pale +palestine +palliative +palomar +panacea +pandemic +panels +panned +pantheist +panty +paperers +par +parades +paragon +parallel +parallels +parameterize +paramus +paraphrases +parcel +pardoned +parentheses +pares +parisian +parkers +parlay +parody +parrots +parsifal +partakes +participant +particular +partitioned +partridges +passageway +passion +passport +pasteur +pasture +patchy +patents +pathogenesis +patients +patricians +patrolling +patrons +patterning +paula +paulus +pavement +pawn +payer +payoffs +peacefully +peaks +pearl +peat +peculiar +pedant +pediatrician +peeling +peering +peking +pembroke +pence +pends +penetration +peninsulas +pennsylvania +pentagon +peopled +peppery +perceived +percents +perchance +perennially +perfectness +performs +perihelion +periodically +perishable +perkins +permeating +permit +pernicious +perpetration +perpetuation +persecuting +perseveres +persist +personal +personified +perspiration +persuasions +perturb +peruses +pervasive +pester +peters +petri +petting +phaedra +phaser +phenomenological +philco +philistinizes +philosophies +phoenicia +phoning +phosphorus +photogenic +photos +phyla +physicist +pi +pick +picketing +pickman +picojoule +picturing +piedfort +pies +piggybacked +pigtail +pilferage +pillar +pilots +pincushion +pining +pinnacle +pinscher +pioneers +pipelining +pirate +pistols +pitching +pithiness +pitiless +pituitary +pixels +placement +plagiarist +plainfield +plaintiveness +planeload +planets +planoconcave +plantings +plasticity +plates +platoon +playboy +playing +playwrights +pleasant +pleat +plenary +pliant +plots +plows +pluggable +plume +plundered +plunging +plutonium +pocahontas +pod +poetical +poincare +pointy +poisons +polaris +police +polish +politer +polka +polluted +polymer +pomerania +pompousness +ponds +pool +pop +popping +popularized +populous +pores +port +portending +portico +portray +posed +position +posits +possessive +possums +posteriori +postmasters +postscript +pot +potentates +potion +pottery +pounces +pourer +poverty +powerful +practicable +practitioners +praise +prancer +prayer +preallocated +precariously +precedents +preciously +precipitation +precludes +preconception +predating +predetermination +predication +predictive +predominately +preemptive +prefacing +prefers +preinitializes +preliminary +premise +preoccupied +prepared +preposterously +prerogatives +prescriptions +presentations +preserved +presidential +pressings +preston +presumptuousness +pretending +pretexts +prevailing +preventing +previously +pricers +prides +primarily +priming +princesses +principles +prior +prisoners +privations +prizes +probate +probings +procedure +processing +proclamation +procreate +procurer +produce +productive +profession +proffered +profitability +profound +program +progresses +prohibitions +projections +proletariat +prolong +prominent +promoter +promptest +promulgation +pronouncement +proofs +propane +properly +prophesy +proportionately +proposer +propounded +prorate +prosecutes +prosodic +prospector +prostate +protecting +protege +protestations +protons +protozoan +prouder +provenance +providence +provision +provokes +proximal +pruned +prussianize +pseudoinstruction +psychiatrist +psychologically +psychosomatic +pub +publicly +puckered +puffed +puller +pulls +pulse +pumpkin +punctually +punishable +punt +puppeteer +purchases +purges +purina +purpler +purposed +purse +pursuing +pushdown +putnam +puzzlement +pyongyang +pythagoreanizes +quadrangle +quadrennial +quagmires +quakeress +qualified +qualm +quantifiers +quantize +quarreled +quartering +quasar +quavering +queerer +queried +questionable +questions +quibble +quicklime +quieting +quince +quit +quivers +quonset +quotient +rabin +rachmaninoff +racketeers +radiance +radiators +radiography +rae +rages +raider +railroaded +rainbow +rains +rake +ralston +ramifications +rams +rand +randy +rangy +rankings +ransomer +rap +rapids +rapturous +rascally +rasping +ratfor +ration +rationalizes +rattler +ravager +ravens +rawlins +rays +reach +reacted +reactivation +reader +readjusted +realigned +realizable +realm +reaped +rear +rearrest +reasonings +reassigned +reawakened +rebellions +rebooting +rebuffed +rebutted +recalibrated +recapitulates +receded +receives +receptive +recife +reciprocating +recitations +reckoned +reclaiming +reclining +recognize +recollect +recommend +recompiles +reconciliation +reconnect +reconstituted +recorder +recover +recreating +recta +recur +recursing +red +redeclared +redefined +redevelopment +redisplayed +redness +redressing +reducibly +reeds +reelects +reenforcement +reestablishing +reexamining +references +referral +refine +reflecting +reflexes +reformatory +reformulated +refrained +refreshment +refugee +refuted +regally +regenerating +regimentation +regis +regressed +regrettable +regularly +regulators +rehearsing +reimbursable +reined +reinhold +reinstated +reintroduces +reiteration +rejoiced +relabeled +relating +relatives +relaxes +relegate +relents +relic +relieving +relinquishing +reloader +reluctance +remains +remedied +remind +reminiscently +remodels +remotely +removing +renames +rendezvous +renewable +renouncing +rented +reopen +reorganize +repairman +repaying +repeatedly +repentance +repetitious +replaceable +replays +replicate +report +reposing +representably +representing +reprieved +reproach +reproducibilities +reprograms +republics +repulses +reputed +required +requisitions +rescind +researchers +resemblances +resentment +reservoir +resident +resignation +resistance +resistors +resolver +resorting +respect +respective +responded +responsible +restarts +restful +restorations +restrainers +restrictive +resultant +resuming +resurrectors +retailing +retaliatory +retentiveness +retina +retiring +retracting +retransmission +retribution +retriever +retrospection +retype +reuniting +revamping +reveler +revere +reverifies +reverses +reviewer +reviser +revival +revoked +revolution +revolvers +rewinding +rewriting +rhesus +rhode +rhyming +ribbons +richard +richmond +rico +ride +ridiculed +rifle +rigging +rightfulness +rigor +rims +ringings +riordan +ripely +rippling +risk +ritually +river +rivulet +roadsters +roaring +robberies +roberta +robinsonville +rochester +rocket +rockwell +rods +roll +romance +romanizes +romper +roofing +rooming +root +roping +rosebush +rosetta +rot +rotations +rotund +roughness +rounding +roused +routes +roving +rowley +royalty +rubbing +rubles +rudeness +ruffian +ruggedness +rule +rumanians +rummy +runaway +runoff +rupturing +russell +rustic +rustlers +ruthlessness +sabbathize +sachs +sacrifice +sacrosanct +saddles +safari +safes +sagebrush +said +sails +salable +salerno +saline +sally +salters +salutations +salvages +same +sampling +sanatorium +sanctioning +sandburg +sandra +sanest +sanskrit +sapling +saran +sari +satchel +satires +satisfy +saturnalia +saud +savaged +saver +savored +sawfish +saxonize +sayings +scala +scaling +scampers +scanners +scapegoat +scared +scatter +scenic +schantz +schelling +schemers +schmitt +scholastic +schoolhouses +schroeder +schuylkill +scissor +scoffs +scope +scoreboard +scorner +scotchgard +scottsdale +scouted +scrambled +scrapes +scratching +screamers +screenings +scribbled +scripts +scrumptious +scuffle +sculptured +scythe +seagate +seam +seaquarium +searchlight +seasonable +seat +seceded +secondary +secretarial +secretive +sections +securings +sedition +see +seedy +seeming +seer +segmentations +segundo +seizures +selectman +selfishly +sells +semantics +semiconductor +semipermanently +senate +seneca +sense +sensing +sensual +sentimentally +separately +sept +sequencers +sequentially +serene +serializable +serif +serra +service +servings +sets +settler +seventeens +severance +severs +sex +sexual +shackled +shadiness +shaffer +shakers +shale +shameful +shanghaied +shapeless +shard +shares +sharpening +shattering +shawano +shearing +sheds +sheets +shelley +shelves +sheridan +shied +shiftier +shilling +shiner +shintoizes +shipper +shirk +shiver +shocker +shoehorn +shooter +shoppers +shortage +shortens +shorts +shouldered +shoved +showed +shows +shrewd +shrilled +shrinking +shrugs +shuffled +shutoff +shuttles +siberia +sicken +sideband +sides +sidings +sierra +sighed +sigma +signature +signification +sikkim +silent +silken +sills +silverman +simile +simon +simplicities +simplistic +simulation +sinbad +sinews +singed +singlet +singularly +sinner +sioux +sirens +sisyphus +sittings +siva +sixties +skate +skeptical +sketchpad +skidding +skillfulness +skims +skipped +skirmishes +skulked +sky +skyrockets +slacks +slang +slash +slaughter +slavic +slavonicizes +sledgehammer +sleepless +sleighs +sliced +slide +slightly +slings +slips +slogans +sloppiness +slotting +slower +sluggishness +slums +smacked +smalltime +smasher +smell +smiles +smithsonian +smoked +smoldered +smoothing +smug +smythe +snap +snapshots +snatched +sneakiest +sneers +sniffs +snodgrass +snorkel +snowbelt +snows +snuffs +soak +soared +sobers +socialists +sociological +socks +sofas +softly +sojourn +soldier +solenoid +solid +solids +solos +solvent +somber +somerset +son +sonny +soothe +sophistication +sordid +sorest +sorrows +soul +soundness +soured +southbound +southland +soviets +spacer +spaded +spaniardization +spanked +spare +sparked +sparsely +spat +spawned +speakers +specialists +specialty +specified +speckle +spectators +spectrography +speculates +speechless +speeds +spellings +spent +spica +spies +spilt +spinner +spirally +spirituals +spiting +spleen +splicing +splits +spoiling +sponged +sponsorship +spoolers +spores +sportswriter +spotter +sprague +spraying +sprees +springiness +sprint +sprouted +spurn +sputtered +squadrons +squarer +squatting +squeaky +squeezing +squirmed +stab +stabilizes +stacked +staffing +stagers +stags +staircases +stalemate +stalling +stammer +stampeding +standard +standings +stans +star +stargate +starring +startles +state +statewide +stationmaster +statues +statutorily +staves +steadier +stealer +steamer +steele +steeper +steered +stem +stenographer +stepmother +stereoscopic +sterilizer +stethoscope +stew +stickier +stiffens +stigma +stillest +stimulate +stinging +stipends +stirrer +stitching +stockholder +stodgy +stomacher +stood +stopgap +storage +storeyed +stormiest +stouter +strafe +straightened +strained +stranding +strangler +stratagem +stratifies +strawberry +streamer +streetcar +strengths +stretched +strict +strife +stringed +stringy +striptease +strode +strolling +strontium +struggle +stuart +stucco +studious +stuffs +stun +stupid +sturm +styli +styx +subcomponents +subdirectory +subdues +subgraph +subjective +sublime +submerges +submode +subordinate +subprogram +subschema +subscripting +subsequent +subsidies +subsistent +substantially +substation +substrate +subsystem +subtleness +subtraction +subunits +subverting +successful +succinctness +suckers +sudden +sufferance +sufficiency +suffocated +sugarings +suggests +suitably +suits +sulks +sultan +summands +summation +summon +sumter +sunder +sunken +sunshine +supercomputers +supergroups +superior +supernatural +supersede +supervise +suppers +supplementing +support +suppose +suppressing +surely +surge +surly +surpass +surprisingly +surround +surveyors +sus +suspended +suspicions +sutherland +swab +swallowing +swan +swaps +swat +sweat +swedes +sweepstakes +sweetest +swellings +swifter +swimsuit +swipe +switchboards +swivel +swords +sykes +sylvia +symbolize +symmetry +sympathy +synagogue +synchronizes +syndication +synonymously +synthesize +syrian +systematically +tabernacle +tablespoonful +tabulate +tacit +tactic +tail +taipei +tale +talker +tallchief +talmudizations +taming +tanaka +tangle +tantalizing +taos +tapestry +tar +tariffs +tasked +tastefully +tattered +taunts +taverns +taxicabs +taylor +teaches +tearful +teaspoonful +technique +tedious +teenaged +tegucigalpa +telegraph +teleologically +telephony +televise +teller +temperance +tempestuous +temporaries +temptingly +tendency +tenex +tense +tent +tenure +terminating +termwise +terre +terrify +terrorize +testable +testifiers +tex +textile +thailand +thankless +thaw +theatrically +theme +theology +theorization +therapies +thereof +thermometer +thessaly +thickly +thimbles +thinking +thirsted +thistle +thorns +those +thousand +thread +threatens +thrift +thrived +throne +throughout +thrusters +thumbed +thunderer +thus +tiburon +tickles +tidied +tier +tighteners +tilde +tillich +timbered +timeouts +timetables +timonizes +tingling +tinkled +tint +tipperary +tirelessly +titan +titter +toasts +togetherness +toilets +tolerable +toleration +tomatoes +ton +tonic +tool +toothpaste +topmost +topsy +tormenting +torrent +torturing +tossed +totallers +touchable +tough +tourist +toweling +towns +toys +tracked +tractor +trader +trafficked +trailed +trainer +tramp +trances +transceivers +transcribers +transferal +transformable +transgressed +transistorized +transitively +translation +transmittal +transparent +transponder +transpose +trapezoidal +trauma +traversal +trays +treasure +treating +treetop +tremor +trespassed +triangles +tribunals +tricked +tricky +triggered +trilled +trimming +tripled +triumphal +trivially +troopers +trotsky +troubleshoots +trowels +trucking +truest +trumped +trunk +trustingly +try +tubs +tuft +tumbled +tunable +tunisia +turbulent +turkize +turning +turtle +tutankhamen +tuttle +twenty +twiner +twirling +twitching +tyler +typewriters +typing +tyranny +ugh +ulcers +umbrage +unacceptably +unaided +unanimous +unattainability +unaware +unblocked +uncancelled +unchanging +unclosed +unconditional +uncontrollable +uncountable +undecidable +underbrush +underflows +underlies +undermine +underplays +understandings +undertakings +underwrites +undirected +undone +uneasy +unequaled +uneventful +unfairness +unfit +unformatted +ungratefully +unharmed +unidirectionality +uniformity +unindented +uninterrupted +unions +unitarian +unity +universities +unkindness +unleashed +unlinking +unlucky +unmerciful +unnecessarily +unobtainable +unpaid +unprecedented +unprovable +unraveling +unrecognized +unrestrained +unsafely +unselected +unskilled +unsteady +unsynchronized +untie +untoward +unused +unwholesome +unwinds +unwrap +updater +upholder +upland +uprightly +upsets +upturns +urge +urinates +ursuline +usages +usenix +usually +utilization +utopianizes +utters +vacation +vacuumed +vagrantly +valence +valiant +validness +valuably +valves +vandenberg +vanished +vanquishing +variably +varies +varying +vastly +vaudois +vax +veering +vegetated +vehicular +velasquez +venetian +venomous +ventricles +venus +verbalized +verdure +verifier +vern +versatile +vertebrates +vested +veterinary +via +vibrations +viciousness +victimizers +victories +vidal +vier +viewing +vignettes +vilification +villages +vinci +vineyard +violator +violins +virgo +virus +visible +visited +visualize +vitally +vladimir +vocations +voided +volition +voltages +volunteering +voted +vouching +voyaged +vulgarly +waco +waffles +wagnerian +wailing +waiter +waives +waking +walgreen +wallenstein +walls +waltham +wandered +waning +wants +ward +warehousing +warmer +warning +warranted +warsaw +washburn +wasps +watch +watchman +watering +watery +wausau +wavelength +waxers +we +weaknesses +wear +wearisomely +weatherford +webs +wedlock +weekly +weidman +weights +weissmuller +welder +wellesley +wenches +wesleyan +westhampton +wet +whacked +wharves +wheel +whelp +wherever +whims +whippany +whirling +whiskers +whistled +whitehorse +whitens +whitlock +whittling +wholeness +whoop +wichita +widen +widowed +wielding +wilbur +wile +wilkinson +william +willis +wilshire +wince +winding +winehead +wining +winnie +winsett +wiped +wiretappers +wised +wishful +witches +withdrew +withholds +witnessing +woefully +womanhood +wonderfulness +woo +wooden +woodstock +woofer +woonsocket +words +workbooks +workman +worldliness +worrier +worshiper +worthless +wounding +wrapper +wreathes +wrenched +wretch +wring +writ +writing +wrote +wyner +xeroxed +yamaha +yard +yawner +years +yellowed +yelped +yesterdays +yoknapatawpha +yorkshire +youngsters +youthfulness +yukon +zeal +zen +zeus +zionism +zoned +zoroaster diff --git a/third_party/rust/mapped_hyph/tests/compound.hyf b/third_party/rust/mapped_hyph/tests/compound.hyf Binary files differnew file mode 100644 index 0000000000..ec447151a4 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/compound.hyf diff --git a/third_party/rust/mapped_hyph/tests/compound4.hyf b/third_party/rust/mapped_hyph/tests/compound4.hyf Binary files differnew file mode 100644 index 0000000000..a5c66944db --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/compound4.hyf diff --git a/third_party/rust/mapped_hyph/tests/compound5.hyf b/third_party/rust/mapped_hyph/tests/compound5.hyf Binary files differnew file mode 100644 index 0000000000..22be46cc20 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/compound5.hyf diff --git a/third_party/rust/mapped_hyph/tests/compound6.hyf b/third_party/rust/mapped_hyph/tests/compound6.hyf Binary files differnew file mode 100644 index 0000000000..7e369d9474 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/compound6.hyf diff --git a/third_party/rust/mapped_hyph/tests/hyphen.hyf b/third_party/rust/mapped_hyph/tests/hyphen.hyf Binary files differnew file mode 100644 index 0000000000..3d587b79b5 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/hyphen.hyf diff --git a/third_party/rust/mapped_hyph/tests/lhmin.hyf b/third_party/rust/mapped_hyph/tests/lhmin.hyf Binary files differnew file mode 100644 index 0000000000..775b96bbf4 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/lhmin.hyf diff --git a/third_party/rust/mapped_hyph/tests/num.hyf b/third_party/rust/mapped_hyph/tests/num.hyf Binary files differnew file mode 100644 index 0000000000..c1edc3a241 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/num.hyf diff --git a/third_party/rust/mapped_hyph/tests/rhmin.hyf b/third_party/rust/mapped_hyph/tests/rhmin.hyf Binary files differnew file mode 100644 index 0000000000..9fd01c3f53 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/rhmin.hyf diff --git a/third_party/rust/mapped_hyph/tests/settings2.hyf b/third_party/rust/mapped_hyph/tests/settings2.hyf Binary files differnew file mode 100644 index 0000000000..398e9a8d68 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/settings2.hyf diff --git a/third_party/rust/mapped_hyph/tests/settings3.hyf b/third_party/rust/mapped_hyph/tests/settings3.hyf Binary files differnew file mode 100644 index 0000000000..d06b18c5d9 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/settings3.hyf diff --git a/third_party/rust/mapped_hyph/tests/test.rs b/third_party/rust/mapped_hyph/tests/test.rs new file mode 100644 index 0000000000..95eae86f67 --- /dev/null +++ b/third_party/rust/mapped_hyph/tests/test.rs @@ -0,0 +1,169 @@ +// Any copyright to the test code below is dedicated to the Public Domain. +// http://creativecommons.org/publicdomain/zero/1.0/ + +use mapped_hyph::Hyphenator; + +#[test] +fn basic_tests() { + let dic_path = "hyph_en_US.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("haha", '-'), "haha"); + assert_eq!(hyph.hyphenate_word("hahaha", '-'), "ha-haha"); + assert_eq!(hyph.hyphenate_word("photo", '-'), "photo"); + assert_eq!(hyph.hyphenate_word("photograph", '-'), "pho-to-graph"); + assert_eq!(hyph.hyphenate_word("photographer", '-'), "pho-tog-ra-pher"); + assert_eq!(hyph.hyphenate_word("photographic", '-'), "pho-to-graphic"); + assert_eq!(hyph.hyphenate_word("photographical", '-'), "pho-to-graph-i-cal"); + assert_eq!(hyph.hyphenate_word("photographically", '-'), "pho-to-graph-i-cally"); + assert_eq!(hyph.hyphenate_word("supercalifragilisticexpialidocious", '-'), "su-per-cal-ifrag-ilis-tic-ex-pi-ali-do-cious"); +} + +// Testcases adapted from tests included with libhyphen. +// (Using only the UTF-8 dictionaries/tests, and omitting those that require +// the extended hyphenation algorithm.) + +#[test] +fn base() { + let dic_path = "tests/base.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + use std::fs::File; + use std::io::{BufRead,BufReader}; + let words: Vec<String> = { + let file = File::open("tests/base.word").unwrap(); + BufReader::new(file).lines().map(|l| l.unwrap()).collect() + }; + let hyphs: Vec<String> = { + let file = File::open("tests/base.hyph").unwrap(); + BufReader::new(file).lines().map(|l| l.unwrap()).collect() + }; + for i in 0 .. words.len() { + assert_eq!(hyph.hyphenate_word(&words[i], '='), hyphs[i]); + } +} + +#[test] +fn compound() { + let dic_path = "tests/compound.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("motorcycle", '-'), "mo-tor-cy-cle"); +} + +#[test] +fn compound4() { + let dic_path = "tests/compound4.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("motorcycle", '-'), "motor-cycle"); +} + +#[test] +fn compound5() { + let dic_path = "tests/compound5.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("postea", '-'), "post-e-a"); +} + +#[test] +fn compound6() { + let dic_path = "tests/compound6.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("meaque", '-'), "me-a-que"); +} + +#[test] +fn settings2() { + let dic_path = "tests/settings2.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("őőőőőőő", '='), "ő=ő=ő=ő=ő=ő=ő"); +} + +#[test] +fn settings3() { + let dic_path = "tests/settings3.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("őőőőőőő", '='), "őő=ő=ő=ő=őő"); +} + +#[test] +fn hyphen() { + let dic_path = "tests/hyphen.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("foobar'foobar-foobar’foobar", '='), "foobar'foobar-foobar’foobar"); +} + +#[test] +fn lhmin() { + let dic_path = "tests/lhmin.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("miért", '='), "mi=ért"); +} + +#[test] +fn rhmin() { + let dic_path = "tests/rhmin.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("övéit", '='), "övéit"); + assert_eq!(hyph.hyphenate_word("అంగడిధర", '='), "అం=గ=డిధర"); +} + +#[test] +fn num() { + let dic_path = "tests/num.hyf"; + let dic = match unsafe { mapped_hyph::load_file(dic_path) } { + Some(dic) => dic, + _ => panic!("failed to load dictionary {}", dic_path), + }; + let hyph = Hyphenator::new(&*dic); + assert_eq!(hyph.hyphenate_word("foobar", '='), "foobar"); + assert_eq!(hyph.hyphenate_word("foobarfoobar", '='), "foobar=foobar"); + assert_eq!(hyph.hyphenate_word("barfoobarfoo", '='), "barfoo=barfoo"); + assert_eq!(hyph.hyphenate_word("123foobarfoobar", '='), "123foobar=foobar"); + assert_eq!(hyph.hyphenate_word("foobarfoobar123", '='), "foobar=foobar123"); + assert_eq!(hyph.hyphenate_word("123foobarfoobar123", '='), "123foobar=foobar123"); + assert_eq!(hyph.hyphenate_word("123barfoobarfoo", '='), "123barfoo=barfoo"); + assert_eq!(hyph.hyphenate_word("barfoobarfoo123", '='), "barfoo=barfoo123"); + assert_eq!(hyph.hyphenate_word("123barfoobarfoo123", '='), "123barfoo=barfoo123"); +} |