diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /vendor/elasticlunr-rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/elasticlunr-rs')
73 files changed, 17498 insertions, 0 deletions
diff --git a/vendor/elasticlunr-rs/.cargo-checksum.json b/vendor/elasticlunr-rs/.cargo-checksum.json new file mode 100644 index 000000000..b10341746 --- /dev/null +++ b/vendor/elasticlunr-rs/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"dc17b933669842b55d03dac401b6f5bc7995c0b033ff1fabe374a6d27caf5ea3","Cargo.lock":"e3da4ffc5df9ce719a677dcd5e73105c95aa71edbbd109ce0d328bd361463087","Cargo.toml":"f7a7cad77986b823efd9c69c13b9f4692711b36c983b1d38126e4cb78e59afd5","LICENSE-APACHE":"769f80b5bcb42ed0af4e4d2fd74e1ac9bf843cb80c5a29219d1ef3544428a6bb","LICENSE-JS":"32c897851d6b8d0e1942394f55355e393c349658a77844546379f7635da39f5e","LICENSE-MIT":"246e91affd36ef7425ae546b820f1280664aec9aea7e740d17f8a5062763d8a6","LICENSE-WORDS":"3d12ac363522fc1afc87797d65d6dc15673018d667dff0be65dae3e6f0b15ceb","README.md":"8ada95d74964dfc82b57d944447d1e818d3f0de49e881b040cf2f6e6ac8df0a1","benches/bench.rs":"0b4927892fa892198bb9a08f90f0f85aa83ac1956e75c801d4925885bd15758e","examples/export_json.rs":"0ba84b03040705bd92ff92eb3a3d4638313faad1fc2033bc0aef3ef9c91d2c76","src/config.rs":"3146ea17eae2900ec3088414fc4aa89fdcfe28fe275bdfd3ce52974180f68b9b","src/document_store.rs":"3777ff7213a972fcf248224d029f852f976373a9cc1142b506a4ed15a27808a5","src/inverted_index.rs":"5f6a6c9051915ae03058cb8e85eb168ba48f6a2d5fdd1e98c667fb9b5e9ac47a","src/lang/ar.rs":"ad9c6e2e528c679b3203b8357e3537a089a934912885bba17020194ca3cc08e8","src/lang/common.rs":"3d26b838ff5feadc9eea2a5cf4a42fe9d574182d9f4cd75f07e3a5b022fd08ca","src/lang/da.rs":"a5ca4390f23698f207cb18eee99680ef223cb5a559c1ce747c319999ba900917","src/lang/de.rs":"ec59dbe1859b8a3ef78c60f2ec04424f5f7135cb099cdd08ddb559a0f33466b2","src/lang/du.rs":"8a5743fb57b507c4a931325d58415a486607809044a5fb3ef85e2cb399d03d22","src/lang/en.rs":"a9ebdbe69767a07a9ee92d81fb4dd754185d535fed559cf3185a53a812f53488","src/lang/es.rs":"3ce5e2544e5cdb378633755188bf63f5f0243cbd4656e63f2ada1c73e93949ec","src/lang/fi.rs":"18f1351ab40ffe17c8c351a12ebb77bbed0382bfae39494c9a1e43598dfb6ff7","src/lang/fr.rs":"116209adf22b2b10748b98837757b8dc0b8450cbe53c4480376f85ea82a62a8b","src/lang/it.rs":"e3c81b49b81129caa8158c2de28279fc8cb106146753d66276bdc2d7de1596c4","src/lang/ja.rs":"74dfca687651dd3693c5cd05c6c463d6e3c54b05516f4a83006f48f6eadc5dc1","src/lang/mod.rs":"1f2f52c7410ca7f3bbfa0f8a90d34b7110c8dc8d6c322f17ef924a06a89788b9","src/lang/no.rs":"d79bfc8f4461718240a2e6c97b519d5a4b6faaf4301342a3259e758afcec591b","src/lang/pt.rs":"529018271b392caa549a27da25f1d314d513e634ec9b3776b0763bbea0ae1a2c","src/lang/ro.rs":"7114819f904f73b5f6b810784bf0123df995fe0aa3cf80d46586ef9c553e846f","src/lang/ru.rs":"d2df35aa13e8c5e3b9b949d61e12c4201a64897d9bb55b64b6f63ca5f7a5d695","src/lang/sv.rs":"92f93a0ad5e7e0996e92f5e7eadb1f702a430356ec8876ad8b5259dedd16d449","src/lang/tr.rs":"b8f263f40a0f69a597ed78528f97769cec82b99c8e2ea28e725f8ac3437fceda","src/lang/zh.rs":"e53e759bc57c47dbda3c6297939e453c9df2ebc433a0cc0a86ede17cc6ad2bf5","src/lib.rs":"97946df282c4292dfc878604848fb55a0c859db85433f4d38254cef0e446d3d8","src/pipeline.rs":"d9fa86014c3cf73d60f9ae176f31addd9c3ea2ee28ed734ece066182c781e01e","tests/data/ar.in.txt":"df497c09371f5c43438e24adb4b48190f91fdc2d8295ac41f59db70c79c5ef9c","tests/data/ar.out.txt":"a19e4a2e86377d83c3acc21abaf38a2d940f6669cbf5b6ed20c964000862027b","tests/data/da.in.txt":"a442d72ca6f52c13e32e8f85103ae9fb9e4780687d9966a30bca3123851e2721","tests/data/da.out.txt":"3534e1b35613992c3466954c5166924335d8b271a5b0ccb03ec134e4dc373e90","tests/data/de.in.txt":"c0e73aed65159717b463dcc685d1c61eb01e1633df006689d8c85bff617bf886","tests/data/de.out.txt":"67aee82047df6ecabab9969185bbd1fa33c560cac4bf801115412c5ffd45f13d","tests/data/du.in.txt":"83e6ba0a2b1f21564af4f85b06bd9c5f0aff387395eb7742ba8204e2618b89a2","tests/data/du.out.txt":"d8def1c8f74193424357ac748b0fdbee5ca0d0f600258d46aac56d509e0135be","tests/data/en.in.txt":"70d401118c572c1f4eca49812e33ffa432949bb6fa6a16be4299c3b75d17fa44","tests/data/en.out.txt":"fbfabfa8b3145fc9846b6aba3273e4cf6b92d7fcc8e4c557dba92321814af41a","tests/data/es.in.txt":"2b0c164f73f829c4631272e02714ca2cd65ed58651e366782e03935668947beb","tests/data/es.out.txt":"dc0371b025b68c811af33d92a026d9e04db3f9205eb65733ab127b38f67320d1","tests/data/fi.in.txt":"6e8cd338b0ec33640c17e6b4ff1b23075b096ed1a21a3c268600e962db261f56","tests/data/fi.out.txt":"4fa9d886ae34463927868fab1ed7912ce95ce57484622bdf2626e627e11730c5","tests/data/fr.in.txt":"3f15507a9dc484f89bc855a63b8a9190b52863c69d1ba22e035853d9ac3bf8a4","tests/data/fr.out.txt":"017a9ca5351d033761cfd068a64818aebf3ebe1379b644da3ab5b99db68f96e5","tests/data/it.in.txt":"083ac8f27965a4f3d5196b61db1ed16eb2cb06062072e2f0e2c403e60aa5bd39","tests/data/it.out.txt":"98aec5db66bcf49a42a3cca2ef4c65c7b00f50c8807fc0930a96ad5a038df2cd","tests/data/ja.in.txt":"0f7fb44fe828e2b336a59cb9812a52941f11615c1dac00eb3ddf8a1e0379f5ec","tests/data/ja.out.txt":"bb76f3288a395f45fc548c0b0369025ac8d3314e5464715c18fdaa85c0a41206","tests/data/no.in.txt":"f8b08e200503d7da64c616d6fefd91c0ed1205781c853cee28d72fbf12c64dc5","tests/data/no.out.txt":"6d4e88befdf405840cf7bc9d1cd911e449a7776749bedf89c0a06ed5c5851327","tests/data/pt.in.txt":"03c3f254f94c6af9970a0cf385d9ef4eae571add817d73632ab9b2c425bc566a","tests/data/pt.out.txt":"f16faa097f2e89d325c3ad4904c28731a889bfe06d6149aea85f914271dfeec5","tests/data/ro.in.txt":"9cd314ad1a9226826c4ba7bcc54d17576c04770983b8c11e1a0954c10f31d35c","tests/data/ro.out.txt":"c6e985fd89afbf998c898c409b829ac966caaab5ea99eca056fed84d59dcf320","tests/data/ru.in.txt":"6aec901d9551242763be93fc531fd7b6e874e305dc77a41e381ea2becfe2aa77","tests/data/ru.out.txt":"ea88681ded3d3fad858808fa14f55944ff5cdabf880d88ee6f4c829636ca4895","tests/data/sv.in.txt":"1c17bbf83e12945ca5215cd08f5ce00b7389fe9337653ff85cb623c44ca1f89e","tests/data/sv.out.txt":"2c7f7c09fa8736ae479bfdae25844a542aa50d327c3b39901a366011b776681e","tests/data/tr.in.txt":"2231a1577866fe6754b2d9f63597bcbe25154ad8f5308757597653dece5ff0a7","tests/data/tr.out.txt":"99d981f997f129336dab61b3530b28051342c5525944384003049925d792f5e5","tests/data/zh.in.txt":"7e81897c36884745bfd7ead3a75ac6b6f78910e57985755f0628e5b492c0ace5","tests/data/zh.out.txt":"c893d2b0308b64f2f86f4b1ad6e6cfb0662f810ee6879529740d91b5c2fa1385","tests/searchindex_fixture_en.json":"7c912cd61e4b4891b5b4eb10fa2cfd38f78c43900432bc0dd5d847abe36c7439","tests/searchindex_fixture_ja.json":"5d1b87b93bc4dae04d810a4606dbaa8306033bffc7d67fc9714e4308c34803bb","tests/test-index.rs":"fd693de29a994953bec77d45676e81469da9a5ffe104ab3d1f5d9bd8f485a398","tests/test-pipeline.rs":"6f98b2df09e528e8fa984333f661f554779f1cd062564da346561eb595aa8942"},"package":"e6dae5cac90640734ee881bc5f21b6e5123f4e5235e52428db114abffc2391d6"}
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/CHANGELOG.md b/vendor/elasticlunr-rs/CHANGELOG.md new file mode 100644 index 000000000..e7b79ba33 --- /dev/null +++ b/vendor/elasticlunr-rs/CHANGELOG.md @@ -0,0 +1,33 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [3.0.0] - 2022-06-01 +### Added + - Language support for Arabic ([#40](https://github.com/mattico/elasticlunr-rs/pull/40])). + - Add the `Language` trait to make it easier to implement languages outside the crate. + - Add `IndexBuilder::add_field_with_tokenizer` to specify the tokenizer for a field. + +### Changed + - Update to 2018 edition, and bump MSRV to 1.54.0. + - Change benchmarks to use Criterion. + - Remove dependency on lazy_static. + - Update dependencies. + - Use Unicode character classes for trimmer. + - `IndexBuilder` functions which add fields will now panic if the same field is added multiple times. + - Fix `IndexBuilder` not respecting field insertion order. + +### Removed + - Remove the `default` feature. You now need to opt-in to the `languages` feature. + - Remove the deprecated function `Pipeline::for_language`. + - Remove the `pipeline::tokenize*` functions, which are now implemented as part of the `Language` trait. + - Remove `Index::add_doc_with_tokenizer(s)`, replaced by `IndexBuilder::add_field_with_tokenizer`. + - Remove the `Language` enum. Use the `Language` trait implementations in the `lang` modules, and the free functions `lang::from_name`, `lang::from_code`, and `lang::languages`. + + +[Unreleased]: https://github.com/mattico/elasticlunr-rs/compare/v3.0.0...HEAD +[3.0.0]: https://github.com/mattico/elasticlunr-rs/compare/v2.3.14...v3.0.0 diff --git a/vendor/elasticlunr-rs/Cargo.lock b/vendor/elasticlunr-rs/Cargo.lock new file mode 100644 index 000000000..f0cc20a74 --- /dev/null +++ b/vendor/elasticlunr-rs/Cargo.lock @@ -0,0 +1,1475 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "build_const" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" + +[[package]] +name = "bumpalo" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cast" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cedarwood" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa312498f9f41452998d984d3deb84c84f86aeb8a2499d7505bb8106d78d147d" +dependencies = [ + "smallvec", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chunked_transfer" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap 0.11.0", + "unicode-width", +] + +[[package]] +name = "clap" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "lazy_static", + "strsim", + "termcolor", + "textwrap 0.15.0", +] + +[[package]] +name = "clap_derive" +version = "3.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "crc" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" +dependencies = [ + "build_const", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" +dependencies = [ + "atty", + "cast", + "clap 2.34.0", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" +dependencies = [ + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "elasticlunr-rs" +version = "3.0.0" +dependencies = [ + "criterion", + "jieba-rs", + "lindera", + "lindera-core", + "maplit", + "regex", + "rust-stemmers", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "encoding" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" +dependencies = [ + "encoding-index-japanese", + "encoding-index-korean", + "encoding-index-simpchinese", + "encoding-index-singlebyte", + "encoding-index-tradchinese", +] + +[[package]] +name = "encoding-index-japanese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-korean" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-simpchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-singlebyte" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding-index-tradchinese" +version = "1.20141219.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" +dependencies = [ + "encoding_index_tests", +] + +[[package]] +name = "encoding_index_tests" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" + +[[package]] +name = "env_logger" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "filetime" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0408e2626025178a6a7f7ffc05a25bc47103229f19c113755de7bf63816290c" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "winapi", +] + +[[package]] +name = "flate2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "jieba-rs" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c7e12f50325401dde50c29ca32cff44bae20873135b39f4e19ecf305226dd80" +dependencies = [ + "cedarwood", + "fxhash", + "hashbrown", + "lazy_static", + "phf", + "phf_codegen", + "regex", +] + +[[package]] +name = "js-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "lindera" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d1c5db4b1d12637aa316dc1adb215f78fe79025080af750942516c5ff17d1a0" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "encoding", + "lindera-cc-cedict-builder", + "lindera-core", + "lindera-dictionary", + "lindera-ipadic", + "lindera-ipadic-builder", + "lindera-ko-dic-builder", + "lindera-unidic-builder", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "lindera-cc-cedict-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73a3509fb497340571d49feddb57e1db2ce5248c4d449f2548d0ee8cb745eb1e" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap 3.1.18", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "lindera-core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20d1b2c085393aed58625d741beca69410e1143fc35bc67ebc35c9885f9f74" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "encoding", + "log", + "serde", + "thiserror", + "yada", +] + +[[package]] +name = "lindera-decompress" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b96b8050cded13927a99bcb8cbb0987f89fc8f35429fc153b4bc05ddc7a53a44" +dependencies = [ + "anyhow", + "lzma-rs", + "serde", +] + +[[package]] +name = "lindera-dictionary" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5abe3dddc22303402957edb4472ab0c996e0d93b3b00643de3bee8b28c2f9297" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "lindera-core", +] + +[[package]] +name = "lindera-ipadic" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8f4c111f6ad9eb9e015d02061af2ed36fc0255f29359294415c7c2f1ea5b5b6" +dependencies = [ + "bincode", + "byteorder", + "encoding", + "flate2", + "lindera-core", + "lindera-ipadic-builder", + "once_cell", + "tar", + "ureq", +] + +[[package]] +name = "lindera-ipadic-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b9893f22a4a7511ac70ff7d96cda9b8d7259b7d7121784183c73bc593ce6e7" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap 3.1.18", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "serde", + "yada", +] + +[[package]] +name = "lindera-ko-dic-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14282600ebfe7ab6fd4f3042143024ff9d74c09d58fd983d0c587839cf940d4a" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap 3.1.18", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "lindera-unidic-builder" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b20825d46c95854e47c532c3e548dfec07c8f187c1ed89383cb6c35790338088" +dependencies = [ + "anyhow", + "bincode", + "byteorder", + "clap 3.1.18", + "csv", + "encoding", + "env_logger", + "glob", + "lindera-core", + "lindera-decompress", + "log", + "yada", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lzma-rs" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aba8ecb0450dfabce4ad72085eed0a75dffe8f21f7ada05638564ea9db2d7fb1" +dependencies = [ + "byteorder", + "crc", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" +dependencies = [ + "adler", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "os_str_bytes" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "plotters" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" + +[[package]] +name = "plotters-svg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustls" +version = "0.20.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +dependencies = [ + "log", + "ring", + "sct", + "webpki", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "semver" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cb243bdfdb5936c8dc3c45762a19d12ab4550cdc753bc247637d4ec35a040fd" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "itoa 1.0.2", + "ryu", + "serde", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b55807c0344e1e6c04d7c965f5289c39a8d94ae23ed5c0b57aabac549f871c6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "unicode-bidi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "unicode-normalization" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + +[[package]] +name = "ureq" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5" +dependencies = [ + "base64", + "chunked_transfer", + "log", + "once_cell", + "rustls", + "url", + "webpki", + "webpki-roots", +] + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744" + +[[package]] +name = "web-sys" +version = "0.3.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.22.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d8de8415c823c8abd270ad483c6feeac771fad964890779f9a8cb24fbbc1bf" +dependencies = [ + "webpki", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xattr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +dependencies = [ + "libc", +] + +[[package]] +name = "yada" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" diff --git a/vendor/elasticlunr-rs/Cargo.toml b/vendor/elasticlunr-rs/Cargo.toml new file mode 100644 index 000000000..10e0d8411 --- /dev/null +++ b/vendor/elasticlunr-rs/Cargo.toml @@ -0,0 +1,86 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +rust-version = "1.54" +name = "elasticlunr-rs" +version = "3.0.0" +authors = ["Matt Ickstadt <mattico8@gmail.com>"] +exclude = ["tests/lunr-fixture-gen/**", "js/**"] +description = "A partial port of elasticlunr.js to Rust for generating static document search indexes" +documentation = "https://docs.rs/elasticlunr-rs" +readme = "README.md" +keywords = ["search", "index", "indexing", "lunr", "elasticlunr"] +license = "MIT/Apache-2.0" +repository = "https://github.com/mattico/elasticlunr-rs" + +[lib] +name = "elasticlunr" + +[[bench]] +name = "bench" +harness = false +[dependencies.jieba-rs] +version = "0.6" +optional = true + +[dependencies.lindera] +version = "0.13" +features = ["ipadic"] +optional = true + +[dependencies.lindera-core] +version = "0.13" +optional = true + +[dependencies.regex] +version = "1" + +[dependencies.rust-stemmers] +version = "1.2.0" +optional = true + +[dependencies.serde] +version = "1" + +[dependencies.serde_derive] +version = "1.0.34" + +[dependencies.serde_json] +version = "1" +[dev-dependencies.criterion] +version = "0.3.0" + +[dev-dependencies.maplit] +version = "1" + +[features] +ar = [] +da = ["rust-stemmers"] +de = ["rust-stemmers"] +du = ["rust-stemmers"] +es = ["rust-stemmers"] +fi = ["rust-stemmers"] +fr = ["rust-stemmers"] +it = ["rust-stemmers"] +ja = ["lindera", "lindera-core"] +languages = ["ar", "da", "de", "du", "es", "fi", "fr", "it", "ja", "no", "pt", "ro", "ru", "sv", "tr", "zh"] +no = ["rust-stemmers"] +pt = ["rust-stemmers"] +ro = ["rust-stemmers"] +ru = ["rust-stemmers"] +sv = ["rust-stemmers"] +tr = ["rust-stemmers"] +zh = ["jieba-rs"] +[badges.maintenance] +status = "passively-maintained" diff --git a/vendor/elasticlunr-rs/LICENSE-APACHE b/vendor/elasticlunr-rs/LICENSE-APACHE new file mode 100644 index 000000000..f8e5e5ea0 --- /dev/null +++ b/vendor/elasticlunr-rs/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/LICENSE-JS b/vendor/elasticlunr-rs/LICENSE-JS new file mode 100644 index 000000000..accb4e986 --- /dev/null +++ b/vendor/elasticlunr-rs/LICENSE-JS @@ -0,0 +1,21 @@ +Portions of this library's code is ported from elasticlunr.js +Used under the terms of the MIT license. + +Copyright (C) 2017 by Wei Song + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/LICENSE-MIT b/vendor/elasticlunr-rs/LICENSE-MIT new file mode 100644 index 000000000..8916d12e9 --- /dev/null +++ b/vendor/elasticlunr-rs/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2017 Matthew Ickstadt + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/LICENSE-WORDS b/vendor/elasticlunr-rs/LICENSE-WORDS new file mode 100644 index 000000000..c9125eccc --- /dev/null +++ b/vendor/elasticlunr-rs/LICENSE-WORDS @@ -0,0 +1,23 @@ +Word lists originally from https://github.com/brenes/stopwords-filter +Used under the terms of the MIT license. + +Copyright (c) 2012 David J. Brenes + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/README.md b/vendor/elasticlunr-rs/README.md new file mode 100644 index 000000000..bd7d534e9 --- /dev/null +++ b/vendor/elasticlunr-rs/README.md @@ -0,0 +1,58 @@ +# elasticlunr-rs + +![Build Status](https://github.com/mattico/elasticlunr-rs/workflows/CI/badge.svg) +[![Documentation](https://docs.rs/elasticlunr-rs/badge.svg)](https://docs.rs/elasticlunr-rs) +[![Crates.io](https://img.shields.io/crates/v/elasticlunr-rs.svg)](https://crates.io/crates/elasticlunr-rs) +![Maintenance](https://img.shields.io/badge/Maintenance-Passive-yellow) + +A partial port of [elasticlunr.js][eljs] to Rust. Intended to be used for +generating compatible search indices. + +## Example + +```Rust +use std::fs::File; +use std::io::Write; +use elasticlunr::Index; + +let mut index = Index::new(&["title", "body"]); +index.add_doc("1", &["This is a title", "This is body text!"]); +// Add more documents... +let mut file = File::create("out.json").unwrap(); +file.write_all(index.to_json_pretty().as_bytes()); +``` + +## Minimum Supported Rust Version + +1.54.0 + +## Languages + +This library includes optional support for non-English languages, see the features in `Cargo.toml`. Like in the JavaScript +version, the language support is designed to be compatible with the [lunr-languages plugins][lunr-languages]. Some +languages use a modified version, which is included in the `js` directory of the repository. + +## License + +This repository is offered under the terms of the + +- Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) +- MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) + +at your option. + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. + +Includes code ported from [elasticlunr.js][eljs] Copyright (C) 2017 by Wei Song, +used under license. See LICENSE-JS for details. + +Includes stop word lists ported from [stopwords-filter][swft] Copyright (C) 2012 +David J. Brenes, used under license. See LICENSE-WORDS for details. + +Bundled javascript code in the repository (not included in the cargo package) may have other licenses. + +[lunr-languages]: https://github.com/MihaiValentin/lunr-languages +[eljs]: https://github.com/weixsong/elasticlunr.js +[swft]: https://github.com/brenes/stopwords-filter
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/benches/bench.rs b/vendor/elasticlunr-rs/benches/bench.rs new file mode 100644 index 000000000..03fa05b42 --- /dev/null +++ b/vendor/elasticlunr-rs/benches/bench.rs @@ -0,0 +1,21 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use elasticlunr::Index; + +fn bench_main(c: &mut Criterion) { + // BTreeMap<String, IndexItem>: 3,165,389 ns/iter (+/- 420,869) + // BTreeMap<char, IndexItem>: 2,920,902 ns/iter (+/- 118,729) + c.bench_function("create_index", |b| { + let text = include_str!("../tests/data/en.in.txt"); + let sections: Vec<_> = text.split("\n\n").collect(); + b.iter(|| { + let mut index = Index::new(&["section"]); + for (i, section) in sections.iter().enumerate() { + index.add_doc(&format!("section_{}", i), &[section]); + } + black_box(index.to_json()); + }) + }); +} + +criterion_group!(benches, bench_main); +criterion_main!(benches); diff --git a/vendor/elasticlunr-rs/examples/export_json.rs b/vendor/elasticlunr-rs/examples/export_json.rs new file mode 100644 index 000000000..3e70a4de0 --- /dev/null +++ b/vendor/elasticlunr-rs/examples/export_json.rs @@ -0,0 +1,23 @@ +use elasticlunr::Index; +use std::fs::File; +use std::io::Write; + +fn main() { + let mut index = Index::new(&["title", "body"]); + index.add_doc( + "1", + &[ + "This Week in Rust 207", + "Hello and welcome to another issue of This Week in Rust!", + ], + ); + index.add_doc( + "2", + &[ + "This Week in Rust 206", + "Hello and welcome to another issue of This Week in Rust!", + ], + ); + let mut file = File::create("examples/out.json").unwrap(); + file.write_all(index.to_json_pretty().as_bytes()).unwrap(); +} diff --git a/vendor/elasticlunr-rs/src/config.rs b/vendor/elasticlunr-rs/src/config.rs new file mode 100644 index 000000000..304bcb227 --- /dev/null +++ b/vendor/elasticlunr-rs/src/config.rs @@ -0,0 +1,128 @@ +//! These types are not used for generating `Index`es. They are provided to help with +//! creating compatible JSON structures for configuring the JavaScript search +//! function. +//! +//! *Reference:* +//! <http://elasticlunr.com/docs/configuration.js.html> + +use std::collections::BTreeMap; + +/// Used to set the search configuration for a specific field. +/// When `expand` or `bool` is `None`, elasticlunr.js will use the value from +/// the global configuration. The `boost` field, if present, +/// increases the importance of this field when ordering search results. +#[derive(Serialize, Deserialize, Default, Debug, Copy, Clone, Eq, PartialEq)] +pub struct SearchOptionsField { + #[serde(skip_serializing_if = "Option::is_none")] + pub boost: Option<u8>, + #[serde(skip_serializing_if = "Option::is_none")] + pub bool: Option<SearchBool>, + #[serde(skip_serializing_if = "Option::is_none")] + pub expand: Option<bool>, +} + +/// Sets which boolean model is used for searching with +/// multiple terms. Defaults to `Or`. +/// +/// - *AND* requires every search term to be present in results +/// - *OR* accepts results which have at least one term +/// +#[derive(Serialize, Deserialize, Debug, Copy, Clone, Eq, PartialEq)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum SearchBool { + Or, + And, +} + +impl Default for SearchBool { + fn default() -> Self { + SearchBool::Or + } +} + +/// The search configuration map which is passed to the +/// elasticlunr.js `Index.search()` function. +/// +/// |Key |Default| +/// |--------|-------| +/// |`bool` |`OR` | +/// |`expand`|`false`| +#[derive(Serialize, Deserialize, Default, Debug, Clone, Eq, PartialEq)] +pub struct SearchOptions { + pub bool: SearchBool, + pub expand: bool, + pub fields: BTreeMap<String, SearchOptionsField>, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json; + + #[test] + fn test_normal_config() { + let options = SearchOptions { + fields: btreemap![ + "title".into() => SearchOptionsField { + boost: Some(5), + ..Default::default() + }, + "body".into() => SearchOptionsField { + boost: Some(1), + ..Default::default() + }, + ], + ..Default::default() + }; + let stringed = serde_json::to_string(&options).unwrap(); + + assert_eq!( + stringed, + r#"{"bool":"OR","expand":false,"fields":{"body":{"boost":1},"title":{"boost":5}}}"# + ); + } + + #[test] + fn test_complex_config() { + let options = SearchOptions { + fields: btreemap! { + "title".into() => SearchOptionsField { + expand: Some(true), + ..Default::default() + }, + "body".into() => SearchOptionsField { + bool: Some(SearchBool::Or), + ..Default::default() + }, + "breadcrumbs".into() => SearchOptionsField { + bool: Some(SearchBool::default()), + boost: Some(200), + ..Default::default() + }, + }, + expand: false, + bool: SearchBool::And, + }; + let stringed = serde_json::to_string_pretty(&options).unwrap(); + + assert_eq!( + stringed, + r#"{ + "bool": "AND", + "expand": false, + "fields": { + "body": { + "bool": "OR" + }, + "breadcrumbs": { + "boost": 200, + "bool": "OR" + }, + "title": { + "expand": true + } + } +}"# + ); + } +} diff --git a/vendor/elasticlunr-rs/src/document_store.rs b/vendor/elasticlunr-rs/src/document_store.rs new file mode 100644 index 000000000..5b745d2ee --- /dev/null +++ b/vendor/elasticlunr-rs/src/document_store.rs @@ -0,0 +1,330 @@ +//! Implements an elasticlunr.js document store. Most users do not need to use this module directly. + +use std::collections::BTreeMap; + +/// The document store saves the complete text of each item saved to the index, if enabled. +/// Most users do not need to use this type directly. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] +pub struct DocumentStore { + pub save: bool, + pub docs: BTreeMap<String, BTreeMap<String, String>>, + pub doc_info: BTreeMap<String, BTreeMap<String, usize>>, + // Redundant with docs.len(), but needed for serialization + pub length: usize, +} + +impl DocumentStore { + pub fn new(save: bool) -> Self { + DocumentStore { + save, + docs: BTreeMap::new(), + doc_info: BTreeMap::new(), + length: 0, + } + } + + pub fn len(&self) -> usize { + self.docs.len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn is_stored(&self) -> bool { + self.save + } + + pub fn has_doc(&self, doc_ref: &str) -> bool { + self.docs.contains_key(doc_ref) + } + + pub fn add_doc(&mut self, doc_ref: &str, doc: BTreeMap<String, String>) { + if !self.has_doc(doc_ref) { + self.length += 1; + } + + self.docs.insert( + doc_ref.into(), + if self.save { doc } else { BTreeMap::new() }, + ); + } + + pub fn get_doc(&self, doc_ref: &str) -> Option<BTreeMap<String, String>> { + self.docs.get(doc_ref).cloned() + } + + pub fn remove_doc(&mut self, doc_ref: &str) { + if self.has_doc(doc_ref) { + self.length -= 1; + } + + self.docs.remove(doc_ref); + } + + pub fn add_field_length(&mut self, doc_ref: &str, field: &str, length: usize) { + self.doc_info + .entry(doc_ref.into()) + .or_insert_with(BTreeMap::new) + .insert(field.into(), length); + } + + pub fn get_field_length(&self, doc_ref: &str, field: &str) -> usize { + if self.has_doc(doc_ref) { + self.doc_info + .get(doc_ref) + .and_then(|e| e.get(field)) + .cloned() + .unwrap_or(0) + } else { + 0 + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_doc_tokens() { + let mut store = DocumentStore::new(true); + let doc = btreemap! { "title".into() => "eggs bread".into() }; + + store.add_doc("1", doc.clone()); + assert_eq!(store.get_doc("1").unwrap(), doc); + } + + #[test] + fn create_doc_no_store() { + let mut store = DocumentStore::new(false); + let doc = btreemap! { "title".into() => "eggs bread".into() }; + + store.add_doc("1", doc); + assert_eq!(store.len(), 1); + assert_eq!(store.is_stored(), false); + assert_eq!(store.has_doc("1"), true); + } + + #[test] + fn add_doc_no_store() { + let mut store = DocumentStore::new(false); + let doc1 = btreemap! { "title".into() => "eggs bread".into() }; + let doc2 = btreemap! { "title".into() => "hello world".into() }; + + store.add_doc("1", doc1); + store.add_doc("2", doc2); + assert_eq!(store.len(), 2); + assert_eq!(store.is_stored(), false); + assert_eq!(store.has_doc("1"), true); + assert_eq!(store.has_doc("2"), true); + } + + #[test] + fn is_stored_true() { + let store = DocumentStore::new(true); + assert_eq!(store.is_stored(), true); + } + + #[test] + fn is_stored_false() { + let store = DocumentStore::new(false); + assert_eq!(store.is_stored(), false); + } + + #[test] + fn get_doc_no_store() { + let mut store = DocumentStore::new(false); + let doc1 = btreemap! { "title".into() => "eggs bread".into() }; + let doc2 = btreemap! { "title".into() => "hello world".into() }; + + store.add_doc("1", doc1); + store.add_doc("2", doc2); + assert_eq!(store.len(), 2); + assert_eq!(store.is_stored(), false); + assert_eq!(store.get_doc("1").unwrap(), BTreeMap::new()); + assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); + } + + #[test] + fn get_nonexistant_doc_no_store() { + let mut store = DocumentStore::new(false); + let doc1 = btreemap! { "title".into() => "eggs bread".into() }; + let doc2 = btreemap! { "title".into() => "hello world".into() }; + + store.add_doc("1", doc1); + store.add_doc("2", doc2); + assert_eq!(store.len(), 2); + assert_eq!(store.is_stored(), false); + assert_eq!(store.get_doc("6"), None); + assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); + } + + #[test] + fn remove_doc_no_store() { + let mut store = DocumentStore::new(false); + let doc1 = btreemap! { "title".into() => "eggs bread".into() }; + let doc2 = btreemap! { "title".into() => "hello world".into() }; + + store.add_doc("1", doc1); + store.add_doc("2", doc2); + store.remove_doc("1"); + assert_eq!(store.len(), 1); + assert_eq!(store.is_stored(), false); + assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); + assert_eq!(store.get_doc("1"), None); + } + + #[test] + fn remove_nonexistant_doc() { + let mut store = DocumentStore::new(false); + let doc1 = btreemap! { "title".into() => "eggs bread".into() }; + let doc2 = btreemap! { "title".into() => "hello world".into() }; + + store.add_doc("1", doc1); + store.add_doc("2", doc2); + store.remove_doc("8"); + assert_eq!(store.len(), 2); + assert_eq!(store.is_stored(), false); + assert_eq!(store.get_doc("2").unwrap(), BTreeMap::new()); + assert_eq!(store.get_doc("1").unwrap(), BTreeMap::new()); + } + + #[test] + fn get_num_docs() { + let mut store = DocumentStore::new(true); + + assert_eq!(store.len(), 0); + store.add_doc("1", btreemap! { "title".into() => "eggs bread".into() }); + assert_eq!(store.len(), 1); + } + + #[test] + fn get_doc() { + let mut store = DocumentStore::new(true); + + assert_eq!(store.len(), 0); + store.add_doc("1", btreemap! { "title".into() => "eggs bread".into() }); + assert_eq!( + store.get_doc("1").unwrap(), + btreemap! { "title".into() => "eggs bread".into() } + ); + } + + #[test] + fn get_doc_many_fields() { + let mut store = DocumentStore::new(true); + + assert_eq!(store.len(), 0); + store.add_doc( + "1", + btreemap! { + "title".into() => "eggs bread".into() + }, + ); + store.add_doc( + "2", + btreemap! { + "title".into() => "boo bar".into() + }, + ); + store.add_doc( + "3", + btreemap! { + "title".into() => "oracle".into(), + "body".into() => "Oracle is demonspawn".into() + }, + ); + assert_eq!( + store.get_doc("3").unwrap(), + btreemap! { + "title".into() => "oracle".into(), + "body".into() => "Oracle is demonspawn".into() + } + ); + assert_eq!(store.len(), 3); + } + + #[test] + fn get_nonexistant_doc() { + let mut store = DocumentStore::new(true); + + assert_eq!(store.len(), 0); + store.add_doc( + "1", + btreemap! { + "title".into() => "eggs bread".into() + }, + ); + store.add_doc( + "2", + btreemap! { + "title".into() => "boo bar".into() + }, + ); + store.add_doc( + "3", + btreemap! { + "title".into() => "oracle".into(), + "body".into() => "Oracle is demonspawn".into() + }, + ); + assert_eq!(store.get_doc("4"), None); + assert_eq!(store.get_doc("0"), None); + assert_eq!(store.len(), 3); + } + + #[test] + fn check_store_has_key() { + let mut store = DocumentStore::new(true); + + assert!(!store.has_doc("foo")); + store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); + assert!(store.has_doc("foo")); + } + + #[test] + fn remove_doc() { + let mut store = DocumentStore::new(true); + + store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); + assert!(store.has_doc("foo")); + assert_eq!(store.len(), 1); + store.remove_doc("foo"); + assert!(!store.has_doc("foo")); + assert_eq!(store.len(), 0); + } + + #[test] + fn remove_nonexistant_store() { + let mut store = DocumentStore::new(true); + + store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); + assert!(store.has_doc("foo")); + assert_eq!(store.len(), 1); + store.remove_doc("bar"); + assert!(store.has_doc("foo")); + assert_eq!(store.len(), 1); + } + + #[test] + fn add_field_len() { + let mut store = DocumentStore::new(true); + + store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); + store.add_field_length("foo", "title", 2); + assert_eq!(store.get_field_length("foo", "title"), 2); + } + + #[test] + fn add_field_length_multiple() { + let mut store = DocumentStore::new(true); + + store.add_doc("foo", btreemap! { "title".into() => "eggs bread".into() }); + store.add_field_length("foo", "title", 2); + store.add_field_length("foo", "body", 10); + assert_eq!(store.get_field_length("foo", "title"), 2); + assert_eq!(store.get_field_length("foo", "body"), 10); + } +} diff --git a/vendor/elasticlunr-rs/src/inverted_index.rs b/vendor/elasticlunr-rs/src/inverted_index.rs new file mode 100644 index 000000000..be4c4c677 --- /dev/null +++ b/vendor/elasticlunr-rs/src/inverted_index.rs @@ -0,0 +1,379 @@ +//! Implements an elasticlunr.js inverted index. Most users do not need to use this module directly. + +use std::collections::BTreeMap; + +#[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq)] +struct TermFrequency { + #[serde(rename = "tf")] + pub term_freq: f64, +} + +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default)] +struct IndexItem { + pub docs: BTreeMap<String, TermFrequency>, + #[serde(rename = "df")] + pub doc_freq: i64, + #[serde(flatten, serialize_with = "IndexItem::serialize")] + pub children: BTreeMap<char, IndexItem>, +} + +impl IndexItem { + fn new() -> Self { + Default::default() + } + + fn serialize<S>(map: &BTreeMap<char, IndexItem>, ser: S) -> Result<S::Ok, S::Error> + where + S: ::serde::Serializer, + { + use serde::ser::SerializeMap; + + let mut ser_map = ser.serialize_map(Some(map.len()))?; + let mut buf = [0u8; 4]; + for (key, value) in map { + let key = key.encode_utf8(&mut buf); + ser_map.serialize_entry(key, value)?; + } + ser_map.end() + } + + fn add_token(&mut self, doc_ref: &str, token: &str, term_freq: f64) { + let mut iter = token.chars(); + if let Some(character) = iter.next() { + let mut item = self + .children + .entry(character) + .or_insert_with(IndexItem::new); + + for character in iter { + let tmp = item; + item = tmp.children.entry(character).or_insert_with(IndexItem::new); + } + + if !item.docs.contains_key(doc_ref) { + item.doc_freq += 1; + } + item.docs + .insert(doc_ref.into(), TermFrequency { term_freq }); + } + } + + fn get_node(&self, token: &str) -> Option<&IndexItem> { + let mut root = self; + for ch in token.chars() { + if let Some(item) = root.children.get(&ch) { + root = item; + } else { + return None; + } + } + + Some(root) + } + + fn remove_token(&mut self, doc_ref: &str, token: &str) { + let mut iter = token.char_indices(); + if let Some((_, ch)) = iter.next() { + if let Some(item) = self.children.get_mut(&ch) { + if let Some((idx, _)) = iter.next() { + item.remove_token(doc_ref, &token[idx..]); + } else if item.docs.contains_key(doc_ref) { + item.docs.remove(doc_ref); + item.doc_freq -= 1; + } + } + } + } +} + +/// Implements an elasticlunr.js inverted index. Most users do not need to use this type directly. +#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] +pub struct InvertedIndex { + root: IndexItem, +} + +impl InvertedIndex { + pub fn new() -> Self { + Default::default() + } + + pub fn add_token(&mut self, doc_ref: &str, token: &str, term_freq: f64) { + self.root.add_token(doc_ref, token, term_freq) + } + + pub fn has_token(&self, token: &str) -> bool { + self.root.get_node(token).map_or(false, |_| true) + } + + pub fn remove_token(&mut self, doc_ref: &str, token: &str) { + self.root.remove_token(doc_ref, token) + } + + pub fn get_docs(&self, token: &str) -> Option<BTreeMap<String, f64>> { + self.root.get_node(token).map(|node| { + node.docs + .iter() + .map(|(k, &v)| (k.clone(), v.term_freq)) + .collect() + }) + } + + pub fn get_term_frequency(&self, doc_ref: &str, token: &str) -> f64 { + self.root + .get_node(token) + .and_then(|node| node.docs.get(doc_ref)) + .map_or(0., |docs| docs.term_freq) + } + + pub fn get_doc_frequency(&self, token: &str) -> i64 { + self.root.get_node(token).map_or(0, |node| node.doc_freq) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn adding_token() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + assert_eq!(inverted_index.get_doc_frequency("foo"), 1); + assert_eq!(inverted_index.get_term_frequency("123", "foo"), 1.); + } + + #[test] + fn has_token() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + assert!(inverted_index.has_token(token)); + assert!(inverted_index.has_token("fo")); + assert!(inverted_index.has_token("f")); + + assert!(!inverted_index.has_token("bar")); + assert!(!inverted_index.has_token("foo ")); + assert!(!inverted_index.has_token("foo ")) + } + + #[test] + fn adding_another_document_to_the_token() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + inverted_index.add_token("456", token, 1.); + + assert_eq!(inverted_index.get_term_frequency("123", "foo"), 1.); + assert_eq!(inverted_index.get_term_frequency("456", "foo"), 1.); + assert_eq!(inverted_index.get_doc_frequency("foo"), 2); + } + + #[test] + fn df_of_nonexistant_token() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + inverted_index.add_token("456", token, 1.); + + assert_eq!(inverted_index.get_doc_frequency("foo"), 2); + assert_eq!(inverted_index.get_doc_frequency("fox"), 0); + } + + #[test] + fn adding_existing_doc() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + inverted_index.add_token("456", token, 1.); + inverted_index.add_token("456", token, 100.); + + assert_eq!(inverted_index.get_term_frequency("456", "foo"), 100.); + assert_eq!(inverted_index.get_doc_frequency("foo"), 2); + } + + #[test] + fn checking_token_exists_in() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + + assert!(inverted_index.has_token(token)); + } + + #[test] + fn checking_if_a_token_does_not_exist() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + assert!(!inverted_index.has_token("fooo")); + assert!(!inverted_index.has_token("bar")); + assert!(!inverted_index.has_token("fof")); + } + + #[test] + fn retrieving_items() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 1.); + assert_eq!( + inverted_index.get_docs(token).unwrap(), + btreemap! { + "123".into() => 1. + } + ); + + assert_eq!(inverted_index.get_docs(""), Some(BTreeMap::new())); + + inverted_index.add_token("234", "boo", 100.); + inverted_index.add_token("345", "too", 101.); + + assert_eq!( + inverted_index.get_docs(token).unwrap(), + btreemap! { + "123".into() => 1. + } + ); + + inverted_index.add_token("234", token, 100.); + inverted_index.add_token("345", token, 101.); + + assert_eq!( + inverted_index.get_docs(token).unwrap(), + btreemap! { + "123".into() => 1., + "234".into() => 100., + "345".into() => 101., + } + ); + } + + #[test] + fn retrieving_nonexistant_items() { + let inverted_index = InvertedIndex::new(); + + assert_eq!(inverted_index.get_docs("foo"), None); + assert_eq!(inverted_index.get_docs("fox"), None); + } + + #[test] + fn df_of_items() { + let mut inverted_index = InvertedIndex::new(); + + inverted_index.add_token("123", "foo", 1.); + inverted_index.add_token("456", "foo", 1.); + inverted_index.add_token("789", "bar", 1.); + + assert_eq!(inverted_index.get_doc_frequency("foo"), 2); + assert_eq!(inverted_index.get_doc_frequency("bar"), 1); + assert_eq!(inverted_index.get_doc_frequency("baz"), 0); + assert_eq!(inverted_index.get_doc_frequency("ba"), 0); + assert_eq!(inverted_index.get_doc_frequency("b"), 0); + assert_eq!(inverted_index.get_doc_frequency("fo"), 0); + assert_eq!(inverted_index.get_doc_frequency("f"), 0); + } + + #[test] + fn removing_document_from_token() { + let mut inverted_index = InvertedIndex::new(); + assert_eq!(inverted_index.get_docs("foo"), None); + + inverted_index.add_token("123", "foo", 1.); + assert_eq!( + inverted_index.get_docs("foo").unwrap(), + btreemap! { + "123".into() => 1., + } + ); + + inverted_index.remove_token("123", "foo"); + assert_eq!(inverted_index.get_docs("foo"), Some(BTreeMap::new())); + assert_eq!(inverted_index.get_doc_frequency("foo"), 0); + assert_eq!(inverted_index.has_token("foo"), true); + } + + #[test] + fn removing_nonexistant_document() { + let mut inverted_index = InvertedIndex::new(); + + inverted_index.add_token("123", "foo", 1.); + inverted_index.add_token("567", "bar", 1.); + inverted_index.remove_token("foo", "456"); + + assert_eq!( + inverted_index.get_docs("foo").unwrap(), + btreemap! { + "123".into() => 1. + } + ); + assert_eq!(inverted_index.get_doc_frequency("foo"), 1); + } + + #[test] + fn removing_documet_nonexistant_key() { + let mut inverted_index = InvertedIndex::new(); + + inverted_index.remove_token("123", "foo"); + assert!(!inverted_index.has_token("foo")); + assert_eq!(inverted_index.get_doc_frequency("foo"), 0); + } + + #[test] + fn get_term_frequency() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 2.); + inverted_index.add_token("456", token, 3.); + + assert_eq!(inverted_index.get_term_frequency("123", token), 2.); + assert_eq!(inverted_index.get_term_frequency("456", token), 3.); + assert_eq!(inverted_index.get_term_frequency("789", token), 0.); + } + + #[test] + fn get_term_frequency_nonexistant_token() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 2.); + inverted_index.add_token("456", token, 3.); + + assert_eq!(inverted_index.get_term_frequency("123", "ken"), 0.); + assert_eq!(inverted_index.get_term_frequency("456", "ken"), 0.); + } + + #[test] + fn get_term_frequency_nonexistant_docref() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 2.); + inverted_index.add_token("456", token, 3.); + + assert_eq!(inverted_index.get_term_frequency(token, "12"), 0.); + assert_eq!(inverted_index.get_term_frequency(token, "23"), 0.); + assert_eq!(inverted_index.get_term_frequency(token, "45"), 0.); + } + + #[test] + fn get_term_frequency_nonexistant_token_and_docref() { + let mut inverted_index = InvertedIndex::new(); + let token = "foo"; + + inverted_index.add_token("123", token, 2.); + inverted_index.add_token("456", token, 3.); + + assert_eq!(inverted_index.get_term_frequency("token", "1"), 0.); + assert_eq!(inverted_index.get_term_frequency("abc", "2"), 0.); + assert_eq!(inverted_index.get_term_frequency("fo", "123"), 0.); + } +} diff --git a/vendor/elasticlunr-rs/src/lang/ar.rs b/vendor/elasticlunr-rs/src/lang/ar.rs new file mode 100644 index 000000000..d0a640edf --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/ar.rs @@ -0,0 +1,66 @@ +use super::Language; +use crate::pipeline::{Pipeline, PipelineFn}; +use regex::Regex; + +/// Arabic Language +/// +/// Designed to be compatibile with the included Javascript implementation. See `js/lunr.ar.js`. +pub struct Arabic {} + +impl Arabic { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Arabic { + fn name(&self) -> String { + "Arabic".into() + } + fn code(&self) -> String { + "ar".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![Box::new(Stemmer::new())], + } + } +} + +struct Stemmer { + diacritics: Regex, + alefs: Regex, +} + +impl Stemmer { + pub fn new() -> Self { + let diacritics = Regex::new("[\u{0640}\u{064b}-\u{065b}]").unwrap(); + let alefs = Regex::new("[\u{0622}\u{0623}\u{0625}\u{0671}\u{0649}]").unwrap(); + Self { diacritics, alefs } + } +} + +impl PipelineFn for Stemmer { + fn name(&self) -> String { + "stemmer-ar".into() + } + + fn filter(&self, token: String) -> Option<String> { + // remove diacritics and elongating character + let result = self.diacritics.replace(&token, ""); + // replace all variations of alef (آأإٱى) to a plain alef (ا) + let result = self.alefs.replace(&result, "\u{0627}"); + if result.is_empty() { + None + } else if result == token { + Some(token) + } else { + Some(result.into()) + } + } +} diff --git a/vendor/elasticlunr-rs/src/lang/common.rs b/vendor/elasticlunr-rs/src/lang/common.rs new file mode 100644 index 000000000..5616f0138 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/common.rs @@ -0,0 +1,97 @@ +use crate::pipeline::PipelineFn; +use regex::Regex; +use std::collections::HashSet; + +#[derive(Clone)] +pub struct StopWordFilter { + name: String, + stop_words: HashSet<String>, +} + +impl StopWordFilter { + pub fn new(name: &str, stop_words: &[&str]) -> Self { + Self { + name: name.into(), + stop_words: stop_words.iter().map(|s| s.to_string()).collect(), + } + } +} + +impl PipelineFn for StopWordFilter { + fn name(&self) -> String { + self.name.clone() + } + + fn filter(&self, token: String) -> Option<String> { + if self.stop_words.contains(&token) { + None + } else { + Some(token) + } + } +} + +#[derive(Clone)] +pub struct RegexTrimmer { + name: String, + trimmer: Regex, +} + +impl RegexTrimmer { + pub fn new(name: &str, word_chars: &str) -> Self { + let name = name.into(); + let trimmer = Regex::new(&format!("^[^{0}]+|[^{0}]+$", word_chars)).unwrap(); + Self { name, trimmer } + } +} + +impl PipelineFn for RegexTrimmer { + fn name(&self) -> String { + self.name.clone() + } + + fn filter(&self, token: String) -> Option<String> { + let result = self.trimmer.replace_all(&token, ""); + if result.is_empty() { + None + } else if result == token { + Some(token) + } else { + Some(result.into()) + } + } +} + +#[cfg(feature = "rust-stemmers")] +pub struct RustStemmer { + name: String, + stemmer: rust_stemmers::Stemmer, +} + +#[cfg(feature = "rust-stemmers")] +impl RustStemmer { + pub fn new(name: &str, algo: rust_stemmers::Algorithm) -> Self { + Self { + name: name.into(), + stemmer: rust_stemmers::Stemmer::create(algo), + } + } +} + +#[cfg(feature = "rust-stemmers")] +impl PipelineFn for RustStemmer { + fn name(&self) -> String { + self.name.clone() + } + + fn filter(&self, token: String) -> Option<String> { + let result = self.stemmer.stem(&token); + if result.is_empty() { + None + } else if result == token { + Some(token) + } else { + Some(result.into()) + } + } +} diff --git a/vendor/elasticlunr-rs/src/lang/da.rs b/vendor/elasticlunr-rs/src/lang/da.rs new file mode 100644 index 000000000..ab3b7dffe --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/da.rs @@ -0,0 +1,49 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Danish {} + +impl Danish { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Danish { + fn name(&self) -> String { + "Danish".into() + } + fn code(&self) -> String { + "da".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-da", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-da", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-da", Algorithm::Danish)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", "ad", "af", "alle", "alt", "anden", "at", "blev", "blive", "bliver", "da", "de", "dem", + "den", "denne", "der", "deres", "det", "dette", "dig", "din", "disse", "dog", "du", "efter", + "eller", "en", "end", "er", "et", "for", "fra", "ham", "han", "hans", "har", "havde", "have", + "hende", "hendes", "her", "hos", "hun", "hvad", "hvis", "hvor", "i", "ikke", "ind", "jeg", + "jer", "jo", "kunne", "man", "mange", "med", "meget", "men", "mig", "min", "mine", "mit", + "mod", "ned", "noget", "nogle", "nu", "når", "og", "også", "om", "op", "os", "over", "på", + "selv", "sig", "sin", "sine", "sit", "skal", "skulle", "som", "sådan", "thi", "til", "ud", + "under", "var", "vi", "vil", "ville", "vor", "være", "været", +]; diff --git a/vendor/elasticlunr-rs/src/lang/de.rs b/vendor/elasticlunr-rs/src/lang/de.rs new file mode 100644 index 000000000..244685ae9 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/de.rs @@ -0,0 +1,273 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct German {} + +impl German { + pub fn new() -> Self { + Self {} + } +} + +impl Language for German { + fn name(&self) -> String { + "German".into() + } + fn code(&self) -> String { + "de".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-de", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-de", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-de", Algorithm::German)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "aber", + "alle", + "allem", + "allen", + "aller", + "alles", + "als", + "also", + "am", + "an", + "ander", + "andere", + "anderem", + "anderen", + "anderer", + "anderes", + "anderm", + "andern", + "anderr", + "anders", + "auch", + "auf", + "aus", + "bei", + "bin", + "bis", + "bist", + "da", + "damit", + "dann", + "das", + "dasselbe", + "dazu", + "daß", + "dein", + "deine", + "deinem", + "deinen", + "deiner", + "deines", + "dem", + "demselben", + "den", + "denn", + "denselben", + "der", + "derer", + "derselbe", + "derselben", + "des", + "desselben", + "dessen", + "dich", + "die", + "dies", + "diese", + "dieselbe", + "dieselben", + "diesem", + "diesen", + "dieser", + "dieses", + "dir", + "doch", + "dort", + "du", + "durch", + "ein", + "eine", + "einem", + "einen", + "einer", + "eines", + "einig", + "einige", + "einigem", + "einigen", + "einiger", + "einiges", + "einmal", + "er", + "es", + "etwas", + "euch", + "euer", + "eure", + "eurem", + "euren", + "eurer", + "eures", + "für", + "gegen", + "gewesen", + "hab", + "habe", + "haben", + "hat", + "hatte", + "hatten", + "hier", + "hin", + "hinter", + "ich", + "ihm", + "ihn", + "ihnen", + "ihr", + "ihre", + "ihrem", + "ihren", + "ihrer", + "ihres", + "im", + "in", + "indem", + "ins", + "ist", + "jede", + "jedem", + "jeden", + "jeder", + "jedes", + "jene", + "jenem", + "jenen", + "jener", + "jenes", + "jetzt", + "kann", + "kein", + "keine", + "keinem", + "keinen", + "keiner", + "keines", + "können", + "könnte", + "machen", + "man", + "manche", + "manchem", + "manchen", + "mancher", + "manches", + "mein", + "meine", + "meinem", + "meinen", + "meiner", + "meines", + "mich", + "mir", + "mit", + "muss", + "musste", + "nach", + "nicht", + "nichts", + "noch", + "nun", + "nur", + "ob", + "oder", + "ohne", + "sehr", + "sein", + "seine", + "seinem", + "seinen", + "seiner", + "seines", + "selbst", + "sich", + "sie", + "sind", + "so", + "solche", + "solchem", + "solchen", + "solcher", + "solches", + "soll", + "sollte", + "sondern", + "sonst", + "um", + "und", + "uns", + "unse", + "unsem", + "unsen", + "unser", + "unses", + "unter", + "viel", + "vom", + "von", + "vor", + "war", + "waren", + "warst", + "was", + "weg", + "weil", + "weiter", + "welche", + "welchem", + "welchen", + "welcher", + "welches", + "wenn", + "werde", + "werden", + "wie", + "wieder", + "will", + "wir", + "wird", + "wirst", + "wo", + "wollen", + "wollte", + "während", + "würde", + "würden", + "zu", + "zum", + "zur", + "zwar", + "zwischen", + "über", +]; diff --git a/vendor/elasticlunr-rs/src/lang/du.rs b/vendor/elasticlunr-rs/src/lang/du.rs new file mode 100644 index 000000000..73a6d3cf7 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/du.rs @@ -0,0 +1,50 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Dutch {} + +impl Dutch { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Dutch { + fn name(&self) -> String { + "Dutch".into() + } + fn code(&self) -> String { + "du".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-du", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-du", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-du", Algorithm::Dutch)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", "aan", "al", "alles", "als", "altijd", "andere", "ben", "bij", "daar", "dan", "dat", "de", + "der", "deze", "die", "dit", "doch", "doen", "door", "dus", "een", "eens", "en", "er", "ge", + "geen", "geweest", "haar", "had", "heb", "hebben", "heeft", "hem", "het", "hier", "hij", "hoe", + "hun", "iemand", "iets", "ik", "in", "is", "ja", "je", "kan", "kon", "kunnen", "maar", "me", + "meer", "men", "met", "mij", "mijn", "moet", "na", "naar", "niet", "niets", "nog", "nu", "of", + "om", "omdat", "onder", "ons", "ook", "op", "over", "reeds", "te", "tegen", "toch", "toen", + "tot", "u", "uit", "uw", "van", "veel", "voor", "want", "waren", "was", "wat", "werd", "wezen", + "wie", "wil", "worden", "wordt", "zal", "ze", "zelf", "zich", "zij", "zijn", "zo", "zonder", + "zou", +]; diff --git a/vendor/elasticlunr-rs/src/lang/en.rs b/vendor/elasticlunr-rs/src/lang/en.rs new file mode 100644 index 000000000..f133ed7c9 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/en.rs @@ -0,0 +1,458 @@ +use super::{common::StopWordFilter, Language}; +use crate::pipeline::{FnWrapper, Pipeline, PipelineFn}; +use regex::Regex; + +const WORDS: &[&str] = &[ + "", "a", "able", "about", "across", "after", "all", "almost", "also", "am", "among", "an", + "and", "any", "are", "as", "at", "be", "because", "been", "but", "by", "can", "cannot", + "could", "dear", "did", "do", "does", "either", "else", "ever", "every", "for", "from", "get", + "got", "had", "has", "have", "he", "her", "hers", "him", "his", "how", "however", "i", "if", + "in", "into", "is", "it", "its", "just", "least", "let", "like", "likely", "may", "me", + "might", "most", "must", "my", "neither", "no", "nor", "not", "of", "off", "often", "on", + "only", "or", "other", "our", "own", "rather", "said", "say", "says", "she", "should", "since", + "so", "some", "than", "that", "the", "their", "them", "then", "there", "these", "they", "this", + "tis", "to", "too", "twas", "us", "wants", "was", "we", "were", "what", "when", "where", + "which", "while", "who", "whom", "why", "will", "with", "would", "yet", "you", "your", +]; + +#[derive(Clone)] +pub struct English { + stemmer: Stemmer, +} + +impl English { + pub fn new() -> Self { + let stemmer = Stemmer::new(); + Self { stemmer } + } +} + +impl Language for English { + fn name(&self) -> String { + "English".into() + } + fn code(&self) -> String { + "en".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(FnWrapper("trimmer".into(), trimmer)), + Box::new(StopWordFilter::new("stopWordFilter", WORDS)), + Box::new(self.stemmer.clone()), + ], + } + } +} + +fn trimmer(token: String) -> Option<String> { + Some( + token + .trim_matches(|c: char| !c.is_digit(36) && c != '_') + .into(), + ) +} + +static STEP_2: &[(&str, &str)] = &[ + ("ational", "ate"), + ("tional", "tion"), + ("enci", "ence"), + ("anci", "ance"), + ("izer", "ize"), + ("bli", "ble"), + ("alli", "al"), + ("entli", "ent"), + ("eli", "e"), + ("ousli", "ous"), + ("ization", "ize"), + ("ation", "ate"), + ("ator", "ate"), + ("alism", "al"), + ("iveness", "ive"), + ("fulness", "ful"), + ("ousness", "ous"), + ("aliti", "al"), + ("iviti", "ive"), + ("biliti", "ble"), + ("logi", "log"), +]; + +static STEP_3: &[(&str, &str)] = &[ + ("icate", "ic"), + ("ative", ""), + ("alize", "al"), + ("iciti", "ic"), + ("ical", "ic"), + ("ful", ""), + ("ness", ""), +]; + +// This is a direct port of the stemmer from elasticlunr.js +// It's not very efficient and very not-rusty, but it +// generates identical output. + +#[derive(Clone)] +struct Stemmer { + re_mgr0: Regex, + re_mgr1: Regex, + re_meq1: Regex, + re_s_v: Regex, + + re_1a: Regex, + re2_1a: Regex, + re_1b: Regex, + re2_1b: Regex, + re2_1b_2: Regex, + re3_1b_2: Regex, + re4_1b_2: Regex, + + re_1c: Regex, + re_2: Regex, + + re_3: Regex, + + re_4: Regex, + re2_4: Regex, + + re_5: Regex, + re3_5: Regex, +} + +impl PipelineFn for Stemmer { + fn name(&self) -> String { + "stemmer".into() + } + + fn filter(&self, token: String) -> Option<String> { + Some(self.stem(token)) + } +} + +// vowel +macro_rules! V { + () => { + "[aeiouy]" + }; +} + +// consonant sequence +macro_rules! CS { + () => { + "[^aeiou][^aeiouy]*" + }; +} + +// vowel sequence +macro_rules! VS { + () => { + "[aeiouy][aeiou]*" + }; +} + +#[inline] +fn concat_string(strs: &[&str]) -> String { + strs.iter().cloned().collect() +} + +impl Stemmer { + fn new() -> Self { + let mgr0 = concat!("^(", CS!(), ")?", VS!(), CS!()); + let meq1 = concat!("^(", CS!(), ")?", VS!(), CS!(), "(", VS!(), ")?$"); + let mgr1 = concat!("^(", CS!(), ")?", VS!(), CS!(), VS!(), CS!()); + let s_v = concat!("^(", CS!(), ")?", V!()); + + let re_mgr0 = Regex::new(mgr0).unwrap(); + let re_mgr1 = Regex::new(mgr1).unwrap(); + let re_meq1 = Regex::new(meq1).unwrap(); + let re_s_v = Regex::new(s_v).unwrap(); + + let re_1a = Regex::new("^(.+?)(ss|i)es$").unwrap(); + let re2_1a = Regex::new("^(.+?)([^s])s$").unwrap(); + let re_1b = Regex::new("^(.+?)eed$").unwrap(); + let re2_1b = Regex::new("^(.+?)(ed|ing)$").unwrap(); + let re2_1b_2 = Regex::new("(at|bl|iz)$").unwrap(); + let re3_1b_2 = Regex::new("([^aeiouylsz]{2})$").unwrap(); + let re4_1b_2 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap(); + + let re_1c = Regex::new("^(.+?[^aeiou])y$").unwrap(); + let re_2 = Regex::new( + "^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|\ + ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$", + ) + .unwrap(); + + let re_3 = Regex::new("^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$").unwrap(); + + let re_4 = Regex::new( + "^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$", + ) + .unwrap(); + let re2_4 = Regex::new("^(.+?)(s|t)(ion)$").unwrap(); + + let re_5 = Regex::new("^(.+?)e$").unwrap(); + let re3_5 = Regex::new(concat!("^", CS!(), V!(), "[^aeiouwxy]$")).unwrap(); + + Stemmer { + re_mgr0, + re_mgr1, + re_meq1, + re_s_v, + re_1a, + re2_1a, + re_1b, + re2_1b, + re2_1b_2, + re3_1b_2, + re4_1b_2, + re_1c, + re_2, + re_3, + re_4, + re2_4, + re_5, + re3_5, + } + } + + /// Implements the Porter stemming algorithm + pub fn stem(&self, mut w: String) -> String { + if w.len() < 3 { + return w; + } + + let starts_with_y = w.as_bytes()[0] == b'y'; + if starts_with_y { + w.remove(0); + w.insert(0, 'Y'); + } + + // TODO: There's probably a better way to handle the + // borrowchecker than cloning w a million times + + // Step 1a + if let Some(caps) = self.re_1a.captures(&w.clone()) { + w = concat_string(&[&caps[1], &caps[2]]); + } + if let Some(caps) = self.re2_1a.captures(&w.clone()) { + w = concat_string(&[&caps[1], &caps[2]]); + } + + // Step 1b + if let Some(caps) = self.re_1b.captures(&w.clone()) { + let stem = &caps[1]; + if self.re_mgr0.is_match(stem) { + w.pop(); + } + } else if let Some(caps) = self.re2_1b.captures(&w.clone()) { + let stem = &caps[1]; + if self.re_s_v.is_match(stem) { + w = stem.into(); + + let mut re3_1b_2_matched = false; + + if self.re2_1b_2.is_match(&w) { + w.push('e'); + } else if let Some(m) = self.re3_1b_2.find(&w.clone()) { + let mut suffix = m.as_str().chars(); + // Make sure the two characters are the same since we can't use backreferences + if suffix.next() == suffix.next() { + re3_1b_2_matched = true; + w.pop(); + } + } + + // re4_1b_2 still runs if re3_1b_2 matches but + // the matched chcaracters are not the same + if !re3_1b_2_matched && self.re4_1b_2.is_match(&w) { + w.push('e'); + } + } + } + + // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first + // letter of the word (so cry -> cri, by -> by, say -> say) + if let Some(caps) = self.re_1c.captures(&w.clone()) { + let stem = &caps[1]; + w = concat_string(&[stem, "i"]); + } + + // Step 2 + if let Some(caps) = self.re_2.captures(&w.clone()) { + let stem = &caps[1]; + let suffix = &caps[2]; + if self.re_mgr0.is_match(stem) { + w = concat_string(&[stem, STEP_2.iter().find(|&&(k, _)| k == suffix).unwrap().1]); + } + } + + // Step 3 + if let Some(caps) = self.re_3.captures(&w.clone()) { + let stem = &caps[1]; + let suffix = &caps[2]; + if self.re_mgr0.is_match(stem) { + w = concat_string(&[stem, STEP_3.iter().find(|&&(k, _)| k == suffix).unwrap().1]); + } + } + + // Step 4 + if let Some(caps) = self.re_4.captures(&w.clone()) { + let stem = &caps[1]; + if self.re_mgr1.is_match(stem) { + w = stem.into(); + } + } else if let Some(caps) = self.re2_4.captures(&w.clone()) { + let stem = concat_string(&[&caps[1], &caps[2]]); + if self.re_mgr1.is_match(&stem) { + w = stem; + } + } + + // Step 5 + if let Some(caps) = self.re_5.captures(&w.clone()) { + let stem = &caps[1]; + if self.re_mgr1.is_match(stem) + || (self.re_meq1.is_match(stem) && !(self.re3_5.is_match(stem))) + { + w = stem.into(); + } + } + + if w.ends_with("ll") && self.re_mgr1.is_match(&w) { + w.pop(); + } + + // replace the original 'y' + if starts_with_y { + w.remove(0); + w.insert(0, 'y'); + } + + w + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! pipeline_eq { + ($func:expr, $input:expr, $output:expr) => { + assert_eq!(&$func($input.to_string()).unwrap(), $output); + }; + } + + #[test] + fn latin_characters() { + pipeline_eq!(trimmer, "hello", "hello"); + } + + #[test] + fn removing_punctuation() { + pipeline_eq!(trimmer, "hello.", "hello"); + pipeline_eq!(trimmer, "it's", "it's"); + pipeline_eq!(trimmer, "james'", "james"); + pipeline_eq!(trimmer, "stop!", "stop"); + pipeline_eq!(trimmer, "first,", "first"); + pipeline_eq!(trimmer, "", ""); + pipeline_eq!(trimmer, "[tag]", "tag"); + pipeline_eq!(trimmer, "[[[tag]]]", "tag"); + pipeline_eq!(trimmer, "[[!@#@!hello]]]}}}", "hello"); + pipeline_eq!(trimmer, "~!@@@hello***()()()]]", "hello"); + } + + #[test] + fn test_stemmer() { + let cases = [ + ("consign", "consign"), + ("consigned", "consign"), + ("consigning", "consign"), + ("consignment", "consign"), + ("consist", "consist"), + ("consisted", "consist"), + ("consistency", "consist"), + ("consistent", "consist"), + ("consistently", "consist"), + ("consisting", "consist"), + ("consists", "consist"), + ("consolation", "consol"), + ("consolations", "consol"), + ("consolatory", "consolatori"), + ("console", "consol"), + ("consoled", "consol"), + ("consoles", "consol"), + ("consolidate", "consolid"), + ("consolidated", "consolid"), + ("consolidating", "consolid"), + ("consoling", "consol"), + ("consols", "consol"), + ("consonant", "conson"), + ("consort", "consort"), + ("consorted", "consort"), + ("consorting", "consort"), + ("conspicuous", "conspicu"), + ("conspicuously", "conspicu"), + ("conspiracy", "conspiraci"), + ("conspirator", "conspir"), + ("conspirators", "conspir"), + ("conspire", "conspir"), + ("conspired", "conspir"), + ("conspiring", "conspir"), + ("constable", "constabl"), + ("constables", "constabl"), + ("constance", "constanc"), + ("constancy", "constanc"), + ("constant", "constant"), + ("knack", "knack"), + ("knackeries", "knackeri"), + ("knacks", "knack"), + ("knag", "knag"), + ("knave", "knave"), + ("knaves", "knave"), + ("knavish", "knavish"), + ("kneaded", "knead"), + ("kneading", "knead"), + ("knee", "knee"), + ("kneel", "kneel"), + ("kneeled", "kneel"), + ("kneeling", "kneel"), + ("kneels", "kneel"), + ("knees", "knee"), + ("knell", "knell"), + ("knelt", "knelt"), + ("knew", "knew"), + ("knick", "knick"), + ("knif", "knif"), + ("knife", "knife"), + ("knight", "knight"), + ("knights", "knight"), + ("knit", "knit"), + ("knits", "knit"), + ("knitted", "knit"), + ("knitting", "knit"), + ("knives", "knive"), + ("knob", "knob"), + ("knobs", "knob"), + ("knock", "knock"), + ("knocked", "knock"), + ("knocker", "knocker"), + ("knockers", "knocker"), + ("knocking", "knock"), + ("knocks", "knock"), + ("knopp", "knopp"), + ("knot", "knot"), + ("knots", "knot"), + ("lay", "lay"), + ("try", "tri"), + ]; + + let stemmer = Stemmer::new(); + for &(input, output) in cases.iter() { + assert_eq!(&stemmer.stem(input.into()), output); + } + } +} diff --git a/vendor/elasticlunr-rs/src/lang/es.rs b/vendor/elasticlunr-rs/src/lang/es.rs new file mode 100644 index 000000000..b6c4b5bcf --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/es.rs @@ -0,0 +1,350 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Spanish {} + +impl Spanish { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Spanish { + fn name(&self) -> String { + "Spanish".into() + } + fn code(&self) -> String { + "es".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-es", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-es", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-es", Algorithm::Spanish)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "a", + "al", + "algo", + "algunas", + "algunos", + "ante", + "antes", + "como", + "con", + "contra", + "cual", + "cuando", + "de", + "del", + "desde", + "donde", + "durante", + "e", + "el", + "ella", + "ellas", + "ellos", + "en", + "entre", + "era", + "erais", + "eran", + "eras", + "eres", + "es", + "esa", + "esas", + "ese", + "eso", + "esos", + "esta", + "estaba", + "estabais", + "estaban", + "estabas", + "estad", + "estada", + "estadas", + "estado", + "estados", + "estamos", + "estando", + "estar", + "estaremos", + "estará", + "estarán", + "estarás", + "estaré", + "estaréis", + "estaría", + "estaríais", + "estaríamos", + "estarían", + "estarías", + "estas", + "este", + "estemos", + "esto", + "estos", + "estoy", + "estuve", + "estuviera", + "estuvierais", + "estuvieran", + "estuvieras", + "estuvieron", + "estuviese", + "estuvieseis", + "estuviesen", + "estuvieses", + "estuvimos", + "estuviste", + "estuvisteis", + "estuviéramos", + "estuviésemos", + "estuvo", + "está", + "estábamos", + "estáis", + "están", + "estás", + "esté", + "estéis", + "estén", + "estés", + "fue", + "fuera", + "fuerais", + "fueran", + "fueras", + "fueron", + "fuese", + "fueseis", + "fuesen", + "fueses", + "fui", + "fuimos", + "fuiste", + "fuisteis", + "fuéramos", + "fuésemos", + "ha", + "habida", + "habidas", + "habido", + "habidos", + "habiendo", + "habremos", + "habrá", + "habrán", + "habrás", + "habré", + "habréis", + "habría", + "habríais", + "habríamos", + "habrían", + "habrías", + "habéis", + "había", + "habíais", + "habíamos", + "habían", + "habías", + "han", + "has", + "hasta", + "hay", + "haya", + "hayamos", + "hayan", + "hayas", + "hayáis", + "he", + "hemos", + "hube", + "hubiera", + "hubierais", + "hubieran", + "hubieras", + "hubieron", + "hubiese", + "hubieseis", + "hubiesen", + "hubieses", + "hubimos", + "hubiste", + "hubisteis", + "hubiéramos", + "hubiésemos", + "hubo", + "la", + "las", + "le", + "les", + "lo", + "los", + "me", + "mi", + "mis", + "mucho", + "muchos", + "muy", + "más", + "mí", + "mía", + "mías", + "mío", + "míos", + "nada", + "ni", + "no", + "nos", + "nosotras", + "nosotros", + "nuestra", + "nuestras", + "nuestro", + "nuestros", + "o", + "os", + "otra", + "otras", + "otro", + "otros", + "para", + "pero", + "poco", + "por", + "porque", + "que", + "quien", + "quienes", + "qué", + "se", + "sea", + "seamos", + "sean", + "seas", + "seremos", + "será", + "serán", + "serás", + "seré", + "seréis", + "sería", + "seríais", + "seríamos", + "serían", + "serías", + "seáis", + "sido", + "siendo", + "sin", + "sobre", + "sois", + "somos", + "son", + "soy", + "su", + "sus", + "suya", + "suyas", + "suyo", + "suyos", + "sí", + "también", + "tanto", + "te", + "tendremos", + "tendrá", + "tendrán", + "tendrás", + "tendré", + "tendréis", + "tendría", + "tendríais", + "tendríamos", + "tendrían", + "tendrías", + "tened", + "tenemos", + "tenga", + "tengamos", + "tengan", + "tengas", + "tengo", + "tengáis", + "tenida", + "tenidas", + "tenido", + "tenidos", + "teniendo", + "tenéis", + "tenía", + "teníais", + "teníamos", + "tenían", + "tenías", + "ti", + "tiene", + "tienen", + "tienes", + "todo", + "todos", + "tu", + "tus", + "tuve", + "tuviera", + "tuvierais", + "tuvieran", + "tuvieras", + "tuvieron", + "tuviese", + "tuvieseis", + "tuviesen", + "tuvieses", + "tuvimos", + "tuviste", + "tuvisteis", + "tuviéramos", + "tuviésemos", + "tuvo", + "tuya", + "tuyas", + "tuyo", + "tuyos", + "tú", + "un", + "una", + "uno", + "unos", + "vosotras", + "vosotros", + "vuestra", + "vuestras", + "vuestro", + "vuestros", + "y", + "ya", + "yo", + "él", + "éramos", +]; diff --git a/vendor/elasticlunr-rs/src/lang/fi.rs b/vendor/elasticlunr-rs/src/lang/fi.rs new file mode 100644 index 000000000..91cfaa571 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/fi.rs @@ -0,0 +1,277 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Finnish {} + +impl Finnish { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Finnish { + fn name(&self) -> String { + "Finnish".into() + } + fn code(&self) -> String { + "fi".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-fi", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-fi", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-fi", Algorithm::Finnish)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "ei", + "eivät", + "emme", + "en", + "et", + "ette", + "että", + "he", + "heidän", + "heidät", + "heihin", + "heille", + "heillä", + "heiltä", + "heissä", + "heistä", + "heitä", + "hän", + "häneen", + "hänelle", + "hänellä", + "häneltä", + "hänen", + "hänessä", + "hänestä", + "hänet", + "häntä", + "itse", + "ja", + "johon", + "joiden", + "joihin", + "joiksi", + "joilla", + "joille", + "joilta", + "joina", + "joissa", + "joista", + "joita", + "joka", + "joksi", + "jolla", + "jolle", + "jolta", + "jona", + "jonka", + "jos", + "jossa", + "josta", + "jota", + "jotka", + "kanssa", + "keiden", + "keihin", + "keiksi", + "keille", + "keillä", + "keiltä", + "keinä", + "keissä", + "keistä", + "keitä", + "keneen", + "keneksi", + "kenelle", + "kenellä", + "keneltä", + "kenen", + "kenenä", + "kenessä", + "kenestä", + "kenet", + "ketkä", + "ketkä", + "ketä", + "koska", + "kuin", + "kuka", + "kun", + "me", + "meidän", + "meidät", + "meihin", + "meille", + "meillä", + "meiltä", + "meissä", + "meistä", + "meitä", + "mihin", + "miksi", + "mikä", + "mille", + "millä", + "miltä", + "minkä", + "minkä", + "minua", + "minulla", + "minulle", + "minulta", + "minun", + "minussa", + "minusta", + "minut", + "minuun", + "minä", + "minä", + "missä", + "mistä", + "mitkä", + "mitä", + "mukaan", + "mutta", + "ne", + "niiden", + "niihin", + "niiksi", + "niille", + "niillä", + "niiltä", + "niin", + "niin", + "niinä", + "niissä", + "niistä", + "niitä", + "noiden", + "noihin", + "noiksi", + "noilla", + "noille", + "noilta", + "noin", + "noina", + "noissa", + "noista", + "noita", + "nuo", + "nyt", + "näiden", + "näihin", + "näiksi", + "näille", + "näillä", + "näiltä", + "näinä", + "näissä", + "näistä", + "näitä", + "nämä", + "ole", + "olemme", + "olen", + "olet", + "olette", + "oli", + "olimme", + "olin", + "olisi", + "olisimme", + "olisin", + "olisit", + "olisitte", + "olisivat", + "olit", + "olitte", + "olivat", + "olla", + "olleet", + "ollut", + "on", + "ovat", + "poikki", + "se", + "sekä", + "sen", + "siihen", + "siinä", + "siitä", + "siksi", + "sille", + "sillä", + "sillä", + "siltä", + "sinua", + "sinulla", + "sinulle", + "sinulta", + "sinun", + "sinussa", + "sinusta", + "sinut", + "sinuun", + "sinä", + "sinä", + "sitä", + "tai", + "te", + "teidän", + "teidät", + "teihin", + "teille", + "teillä", + "teiltä", + "teissä", + "teistä", + "teitä", + "tuo", + "tuohon", + "tuoksi", + "tuolla", + "tuolle", + "tuolta", + "tuon", + "tuona", + "tuossa", + "tuosta", + "tuota", + "tähän", + "täksi", + "tälle", + "tällä", + "tältä", + "tämä", + "tämän", + "tänä", + "tässä", + "tästä", + "tätä", + "vaan", + "vai", + "vaikka", + "yli", +]; diff --git a/vendor/elasticlunr-rs/src/lang/fr.rs b/vendor/elasticlunr-rs/src/lang/fr.rs new file mode 100644 index 000000000..ec41f307a --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/fr.rs @@ -0,0 +1,56 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct French {} + +impl French { + pub fn new() -> Self { + Self {} + } +} + +impl Language for French { + fn name(&self) -> String { + "French".into() + } + fn code(&self) -> String { + "fr".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-fr", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-fr", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-fr", Algorithm::French)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", "ai", "aie", "aient", "aies", "ait", "as", "au", "aura", "aurai", "auraient", "aurais", + "aurait", "auras", "aurez", "auriez", "aurions", "aurons", "auront", "aux", "avaient", "avais", + "avait", "avec", "avez", "aviez", "avions", "avons", "ayant", "ayez", "ayons", "c", "ce", + "ceci", "celà", "ces", "cet", "cette", "d", "dans", "de", "des", "du", "elle", "en", "es", + "est", "et", "eu", "eue", "eues", "eurent", "eus", "eusse", "eussent", "eusses", "eussiez", + "eussions", "eut", "eux", "eûmes", "eût", "eûtes", "furent", "fus", "fusse", "fussent", + "fusses", "fussiez", "fussions", "fut", "fûmes", "fût", "fûtes", "ici", "il", "ils", "j", "je", + "l", "la", "le", "les", "leur", "leurs", "lui", "m", "ma", "mais", "me", "mes", "moi", "mon", + "même", "n", "ne", "nos", "notre", "nous", "on", "ont", "ou", "par", "pas", "pour", "qu", + "que", "quel", "quelle", "quelles", "quels", "qui", "s", "sa", "sans", "se", "sera", "serai", + "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront", + "ses", "soi", "soient", "sois", "soit", "sommes", "son", "sont", "soyez", "soyons", "suis", + "sur", "t", "ta", "te", "tes", "toi", "ton", "tu", "un", "une", "vos", "votre", "vous", "y", + "à", "étaient", "étais", "était", "étant", "étiez", "étions", "été", "étée", "étées", "étés", + "êtes", +]; diff --git a/vendor/elasticlunr-rs/src/lang/it.rs b/vendor/elasticlunr-rs/src/lang/it.rs new file mode 100644 index 000000000..78d7e4454 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/it.rs @@ -0,0 +1,321 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Italian {} + +impl Italian { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Italian { + fn name(&self) -> String { + "Italian".into() + } + fn code(&self) -> String { + "it".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-it", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-it", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-it", Algorithm::Italian)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "a", + "abbia", + "abbiamo", + "abbiano", + "abbiate", + "ad", + "agl", + "agli", + "ai", + "al", + "all", + "alla", + "alle", + "allo", + "anche", + "avemmo", + "avendo", + "avesse", + "avessero", + "avessi", + "avessimo", + "aveste", + "avesti", + "avete", + "aveva", + "avevamo", + "avevano", + "avevate", + "avevi", + "avevo", + "avrai", + "avranno", + "avrebbe", + "avrebbero", + "avrei", + "avremmo", + "avremo", + "avreste", + "avresti", + "avrete", + "avrà", + "avrò", + "avuta", + "avute", + "avuti", + "avuto", + "c", + "che", + "chi", + "ci", + "coi", + "col", + "come", + "con", + "contro", + "cui", + "da", + "dagl", + "dagli", + "dai", + "dal", + "dall", + "dalla", + "dalle", + "dallo", + "degl", + "degli", + "dei", + "del", + "dell", + "della", + "delle", + "dello", + "di", + "dov", + "dove", + "e", + "ebbe", + "ebbero", + "ebbi", + "ed", + "era", + "erano", + "eravamo", + "eravate", + "eri", + "ero", + "essendo", + "faccia", + "facciamo", + "facciano", + "facciate", + "faccio", + "facemmo", + "facendo", + "facesse", + "facessero", + "facessi", + "facessimo", + "faceste", + "facesti", + "faceva", + "facevamo", + "facevano", + "facevate", + "facevi", + "facevo", + "fai", + "fanno", + "farai", + "faranno", + "farebbe", + "farebbero", + "farei", + "faremmo", + "faremo", + "fareste", + "faresti", + "farete", + "farà", + "farò", + "fece", + "fecero", + "feci", + "fosse", + "fossero", + "fossi", + "fossimo", + "foste", + "fosti", + "fu", + "fui", + "fummo", + "furono", + "gli", + "ha", + "hai", + "hanno", + "ho", + "i", + "il", + "in", + "io", + "l", + "la", + "le", + "lei", + "li", + "lo", + "loro", + "lui", + "ma", + "mi", + "mia", + "mie", + "miei", + "mio", + "ne", + "negl", + "negli", + "nei", + "nel", + "nell", + "nella", + "nelle", + "nello", + "noi", + "non", + "nostra", + "nostre", + "nostri", + "nostro", + "o", + "per", + "perché", + "più", + "quale", + "quanta", + "quante", + "quanti", + "quanto", + "quella", + "quelle", + "quelli", + "quello", + "questa", + "queste", + "questi", + "questo", + "sarai", + "saranno", + "sarebbe", + "sarebbero", + "sarei", + "saremmo", + "saremo", + "sareste", + "saresti", + "sarete", + "sarà", + "sarò", + "se", + "sei", + "si", + "sia", + "siamo", + "siano", + "siate", + "siete", + "sono", + "sta", + "stai", + "stando", + "stanno", + "starai", + "staranno", + "starebbe", + "starebbero", + "starei", + "staremmo", + "staremo", + "stareste", + "staresti", + "starete", + "starà", + "starò", + "stava", + "stavamo", + "stavano", + "stavate", + "stavi", + "stavo", + "stemmo", + "stesse", + "stessero", + "stessi", + "stessimo", + "steste", + "stesti", + "stette", + "stettero", + "stetti", + "stia", + "stiamo", + "stiano", + "stiate", + "sto", + "su", + "sua", + "sue", + "sugl", + "sugli", + "sui", + "sul", + "sull", + "sulla", + "sulle", + "sullo", + "suo", + "suoi", + "ti", + "tra", + "tu", + "tua", + "tue", + "tuo", + "tuoi", + "tutti", + "tutto", + "un", + "una", + "uno", + "vi", + "voi", + "vostra", + "vostre", + "vostri", + "vostro", + "è", +]; diff --git a/vendor/elasticlunr-rs/src/lang/ja.rs b/vendor/elasticlunr-rs/src/lang/ja.rs new file mode 100644 index 000000000..e38fcde9f --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/ja.rs @@ -0,0 +1,76 @@ +use super::{common::RegexTrimmer, Language}; +use crate::pipeline::{FnWrapper, Pipeline}; +use lindera::tokenizer::{Tokenizer, TokenizerConfig}; +use lindera_core::viterbi::Mode; + +#[derive(Clone)] +pub struct Japanese { + tokenizer: Tokenizer, +} + +impl Japanese { + pub fn new() -> Self { + let config = TokenizerConfig { + mode: Mode::Decompose(Default::default()), + ..Default::default() + }; + Self::with_config(config) + } + + pub fn with_config(config: TokenizerConfig) -> Self { + // NB: unwrap() is okay since the errors are only related to user-supplied dictionaries. + let tokenizer = Tokenizer::with_config(config).unwrap(); + Self { tokenizer } + } +} + +impl Language for Japanese { + fn name(&self) -> String { + "Japanese".into() + } + fn code(&self) -> String { + "ja".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + self.tokenizer + .tokenize(text) + .unwrap() + .into_iter() + .filter_map(|tok| match tok.detail.get(0).map(|d| d.as_str()) { + Some("助詞") | Some("助動詞") | Some("記号") | Some("UNK") => None, + _ => Some(tok.text.to_string()), + }) + .collect() + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-ja", WORD_CHARS)), + Box::new(FnWrapper("stemmer-ja".into(), stemmer)), + ], + } + } +} + +const WORD_CHARS: &str = r"0-9A-Za-z\p{Hiragana}\p{Katakana}\p{Unified_Ideograph}"; + +fn stemmer(token: String) -> Option<String> { + Some(token) +} + +#[cfg(test)] +mod tests { + use crate::pipeline::PipelineFn; + use super::*; + + #[test] + fn test_trimmer() { + let trimmer = RegexTrimmer::new("trimmer-ja".into(), WORD_CHARS); + assert_eq!( + trimmer.filter(" こんにちは、世界!".to_string()), + Some("こんにちは、世界".to_string()) + ); + } +} diff --git a/vendor/elasticlunr-rs/src/lang/mod.rs b/vendor/elasticlunr-rs/src/lang/mod.rs new file mode 100644 index 000000000..81966e1b2 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/mod.rs @@ -0,0 +1,138 @@ +//! Intended to be compatible with <https://github.com/MihaiValentin/lunr-languages>. Each supported +//! language has a trimmer, a stop word filter, and a stemmer. Most users will not need to use +//! these modules directly. + +pub mod common; + +use crate::Pipeline; + +pub trait Language { + /// The name of the language in English + fn name(&self) -> String; + + /// The ISO 639-1 language code of the language + fn code(&self) -> String; + + /// Separates the input text into individual tokens. In most languages a token is a word, separated by whitespace. + fn tokenize(&self, text: &str) -> Vec<String>; + + /// Returns the [`Pipeline`] to process the tokens with + fn make_pipeline(&self) -> Pipeline; +} + +/// Splits a text string into a vector of individual tokens. +pub fn tokenize_whitespace(text: &str) -> Vec<String> { + text.split(|c: char| c.is_whitespace() || c == '-') + .filter(|s| !s.is_empty()) + .map(|s| s.trim().to_lowercase()) + .collect() +} + +macro_rules! impl_language { + ($( ( $name:ident, $code:ident $(, #[$cfgs:meta] )? ), )+) => { + /// Returns a list of all the [`Language`] implementations in the crate + pub fn languages() -> Vec<Box<dyn Language>> { + vec![ + $( + $(#[$cfgs])? + Box::new($code::$name::new()), + )+ + ] + } + + /// Returns the [`Language`] for the given two-character [ISO 639-1][iso] language code if the + /// language is supported. Returns `None` if not supported. + /// + /// *Note:* + /// + /// The ISO 639-1 code for Dutch is "nl". However "du" is used for the module name + /// and pipeline suffix in order to match lunr-languages. + /// + /// [iso]: https://en.wikipedia.org/wiki/ISO_639-1 + pub fn from_code(code: &str) -> Option<Box<dyn Language>> { + match code.to_ascii_lowercase().as_str() { + $( + $(#[$cfgs])? + stringify!($code) => Some(Box::new($code::$name::new())), + )+ + _ => None, + } + } + + /// Returns the [`Language`] for the given English language name if the + /// language is supported. Returns `None` if not supported. The first letter must + /// be capitalized. + pub fn from_name(name: &str) -> Option<Box<dyn Language>> { + match name { + $( + $(#[$cfgs])? + stringify!($name) => Some(Box::new($code::$name::new())), + )+ + _ => None, + } + } + + $( + $(#[$cfgs])? + mod $code; + + $(#[$cfgs])? + pub use $code::$name; + )+ + }; +} + +impl_language! { + (English, en), + (Arabic, ar, #[cfg(feature = "ar")]), + (Chinese, zh, #[cfg(feature = "zh")]), + (Danish, da, #[cfg(feature = "da")]), + (Dutch, du, #[cfg(feature = "du")]), + (Finnish, fi, #[cfg(feature = "fi")]), + (French, fr, #[cfg(feature = "fr")]), + (German, de, #[cfg(feature = "de")]), + (Italian, it, #[cfg(feature = "it")]), + (Japanese, ja, #[cfg(feature = "ja")]), + (Norwegian, no, #[cfg(feature = "no")]), + (Portuguese, pt, #[cfg(feature = "pt")]), + (Romanian, ro, #[cfg(feature = "ro")]), + (Russian, ru, #[cfg(feature = "ru")]), + (Spanish, es, #[cfg(feature = "es")]), + (Swedish, sv, #[cfg(feature = "sv")]), + (Turkish, tr, #[cfg(feature = "tr")]), +} + +#[cfg(test)] +mod tests { + use super::tokenize_whitespace; + + #[test] + fn split_simple_strings() { + let string = "this is a simple string"; + assert_eq!( + &tokenize_whitespace(string), + &["this", "is", "a", "simple", "string"] + ); + } + + #[test] + fn multiple_white_space() { + let string = " foo bar "; + assert_eq!(&tokenize_whitespace(string), &["foo", "bar"]); + } + + #[test] + fn hyphens() { + let string = "take the New York-San Francisco flight"; + assert_eq!( + &tokenize_whitespace(string), + &["take", "the", "new", "york", "san", "francisco", "flight"] + ); + } + + #[test] + fn splitting_strings_with_hyphens() { + let string = "Solve for A - B"; + assert_eq!(&tokenize_whitespace(string), &["solve", "for", "a", "b"]); + } +} diff --git a/vendor/elasticlunr-rs/src/lang/no.rs b/vendor/elasticlunr-rs/src/lang/no.rs new file mode 100644 index 000000000..710346fde --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/no.rs @@ -0,0 +1,218 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Norwegian {} + +impl Norwegian { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Norwegian { + fn name(&self) -> String { + "Norwegian".into() + } + fn code(&self) -> String { + "no".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-no", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-no", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-no", Algorithm::Norwegian)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "alle", + "at", + "av", + "bare", + "begge", + "ble", + "blei", + "bli", + "blir", + "blitt", + "både", + "båe", + "da", + "de", + "deg", + "dei", + "deim", + "deira", + "deires", + "dem", + "den", + "denne", + "der", + "dere", + "deres", + "det", + "dette", + "di", + "din", + "disse", + "ditt", + "du", + "dykk", + "dykkar", + "då", + "eg", + "ein", + "eit", + "eitt", + "eller", + "elles", + "en", + "enn", + "er", + "et", + "ett", + "etter", + "for", + "fordi", + "fra", + "før", + "ha", + "hadde", + "han", + "hans", + "har", + "hennar", + "henne", + "hennes", + "her", + "hjå", + "ho", + "hoe", + "honom", + "hoss", + "hossen", + "hun", + "hva", + "hvem", + "hver", + "hvilke", + "hvilken", + "hvis", + "hvor", + "hvordan", + "hvorfor", + "i", + "ikke", + "ikkje", + "ikkje", + "ingen", + "ingi", + "inkje", + "inn", + "inni", + "ja", + "jeg", + "kan", + "kom", + "korleis", + "korso", + "kun", + "kunne", + "kva", + "kvar", + "kvarhelst", + "kven", + "kvi", + "kvifor", + "man", + "mange", + "me", + "med", + "medan", + "meg", + "meget", + "mellom", + "men", + "mi", + "min", + "mine", + "mitt", + "mot", + "mykje", + "ned", + "no", + "noe", + "noen", + "noka", + "noko", + "nokon", + "nokor", + "nokre", + "nå", + "når", + "og", + "også", + "om", + "opp", + "oss", + "over", + "på", + "samme", + "seg", + "selv", + "si", + "si", + "sia", + "sidan", + "siden", + "sin", + "sine", + "sitt", + "sjøl", + "skal", + "skulle", + "slik", + "so", + "som", + "som", + "somme", + "somt", + "så", + "sånn", + "til", + "um", + "upp", + "ut", + "uten", + "var", + "vart", + "varte", + "ved", + "vere", + "verte", + "vi", + "vil", + "ville", + "vore", + "vors", + "vort", + "vår", + "være", + "være", + "vært", + "å", +]; diff --git a/vendor/elasticlunr-rs/src/lang/pt.rs b/vendor/elasticlunr-rs/src/lang/pt.rs new file mode 100644 index 000000000..5f36f4280 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/pt.rs @@ -0,0 +1,245 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Portuguese {} + +impl Portuguese { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Portuguese { + fn name(&self) -> String { + "Portuguese".into() + } + fn code(&self) -> String { + "pt".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-pt", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-pt", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-pt", Algorithm::Portuguese)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "a", + "ao", + "aos", + "aquela", + "aquelas", + "aquele", + "aqueles", + "aquilo", + "as", + "até", + "com", + "como", + "da", + "das", + "de", + "dela", + "delas", + "dele", + "deles", + "depois", + "do", + "dos", + "e", + "ela", + "elas", + "ele", + "eles", + "em", + "entre", + "era", + "eram", + "essa", + "essas", + "esse", + "esses", + "esta", + "estamos", + "estas", + "estava", + "estavam", + "este", + "esteja", + "estejam", + "estejamos", + "estes", + "esteve", + "estive", + "estivemos", + "estiver", + "estivera", + "estiveram", + "estiverem", + "estivermos", + "estivesse", + "estivessem", + "estivéramos", + "estivéssemos", + "estou", + "está", + "estávamos", + "estão", + "eu", + "foi", + "fomos", + "for", + "fora", + "foram", + "forem", + "formos", + "fosse", + "fossem", + "fui", + "fôramos", + "fôssemos", + "haja", + "hajam", + "hajamos", + "havemos", + "hei", + "houve", + "houvemos", + "houver", + "houvera", + "houveram", + "houverei", + "houverem", + "houveremos", + "houveria", + "houveriam", + "houvermos", + "houverá", + "houverão", + "houveríamos", + "houvesse", + "houvessem", + "houvéramos", + "houvéssemos", + "há", + "hão", + "isso", + "isto", + "já", + "lhe", + "lhes", + "mais", + "mas", + "me", + "mesmo", + "meu", + "meus", + "minha", + "minhas", + "muito", + "na", + "nas", + "nem", + "no", + "nos", + "nossa", + "nossas", + "nosso", + "nossos", + "num", + "numa", + "não", + "nós", + "o", + "os", + "ou", + "para", + "pela", + "pelas", + "pelo", + "pelos", + "por", + "qual", + "quando", + "que", + "quem", + "se", + "seja", + "sejam", + "sejamos", + "sem", + "serei", + "seremos", + "seria", + "seriam", + "será", + "serão", + "seríamos", + "seu", + "seus", + "somos", + "sou", + "sua", + "suas", + "são", + "só", + "também", + "te", + "tem", + "temos", + "tenha", + "tenham", + "tenhamos", + "tenho", + "terei", + "teremos", + "teria", + "teriam", + "terá", + "terão", + "teríamos", + "teu", + "teus", + "teve", + "tinha", + "tinham", + "tive", + "tivemos", + "tiver", + "tivera", + "tiveram", + "tiverem", + "tivermos", + "tivesse", + "tivessem", + "tivéramos", + "tivéssemos", + "tu", + "tua", + "tuas", + "tém", + "tínhamos", + "um", + "uma", + "você", + "vocês", + "vos", + "à", + "às", + "éramos", +]; diff --git a/vendor/elasticlunr-rs/src/lang/ro.rs b/vendor/elasticlunr-rs/src/lang/ro.rs new file mode 100644 index 000000000..8244fe967 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/ro.rs @@ -0,0 +1,323 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Romanian {} + +impl Romanian { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Romanian { + fn name(&self) -> String { + "Romanian".into() + } + fn code(&self) -> String { + "ro".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-ro", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-ro", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-ro", Algorithm::Romanian)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "acea", + "aceasta", + "această", + "aceea", + "acei", + "aceia", + "acel", + "acela", + "acele", + "acelea", + "acest", + "acesta", + "aceste", + "acestea", + "aceşti", + "aceştia", + "acolo", + "acord", + "acum", + "ai", + "aia", + "aibă", + "aici", + "al", + "ale", + "alea", + "altceva", + "altcineva", + "am", + "ar", + "are", + "asemenea", + "asta", + "astea", + "astăzi", + "asupra", + "au", + "avea", + "avem", + "aveţi", + "azi", + "aş", + "aşadar", + "aţi", + "bine", + "bucur", + "bună", + "ca", + "care", + "caut", + "ce", + "cel", + "ceva", + "chiar", + "cinci", + "cine", + "cineva", + "contra", + "cu", + "cum", + "cumva", + "curând", + "curînd", + "când", + "cât", + "câte", + "câtva", + "câţi", + "cînd", + "cît", + "cîte", + "cîtva", + "cîţi", + "că", + "căci", + "cărei", + "căror", + "cărui", + "către", + "da", + "dacă", + "dar", + "datorită", + "dată", + "dau", + "de", + "deci", + "deja", + "deoarece", + "departe", + "deşi", + "din", + "dinaintea", + "dintr-", + "dintre", + "doi", + "doilea", + "două", + "drept", + "după", + "dă", + "ea", + "ei", + "el", + "ele", + "eram", + "este", + "eu", + "eşti", + "face", + "fata", + "fi", + "fie", + "fiecare", + "fii", + "fim", + "fiu", + "fiţi", + "frumos", + "fără", + "graţie", + "halbă", + "iar", + "ieri", + "la", + "le", + "li", + "lor", + "lui", + "lângă", + "lîngă", + "mai", + "mea", + "mei", + "mele", + "mereu", + "meu", + "mi", + "mie", + "mine", + "mult", + "multă", + "mulţi", + "mulţumesc", + "mâine", + "mîine", + "mă", + "ne", + "nevoie", + "nici", + "nicăieri", + "nimeni", + "nimeri", + "nimic", + "nişte", + "noastre", + "noastră", + "noi", + "noroc", + "nostru", + "nouă", + "noştri", + "nu", + "opt", + "ori", + "oricare", + "orice", + "oricine", + "oricum", + "oricând", + "oricât", + "oricînd", + "oricît", + "oriunde", + "patra", + "patru", + "patrulea", + "pe", + "pentru", + "peste", + "pic", + "poate", + "pot", + "prea", + "prima", + "primul", + "prin", + "puţin", + "puţina", + "puţină", + "până", + "pînă", + "rog", + "sa", + "sale", + "sau", + "se", + "spate", + "spre", + "sub", + "sunt", + "suntem", + "sunteţi", + "sută", + "sînt", + "sîntem", + "sînteţi", + "să", + "săi", + "său", + "ta", + "tale", + "te", + "timp", + "tine", + "toate", + "toată", + "tot", + "totuşi", + "toţi", + "trei", + "treia", + "treilea", + "tu", + "tăi", + "tău", + "un", + "una", + "unde", + "undeva", + "unei", + "uneia", + "unele", + "uneori", + "unii", + "unor", + "unora", + "unu", + "unui", + "unuia", + "unul", + "vi", + "voastre", + "voastră", + "voi", + "vostru", + "vouă", + "voştri", + "vreme", + "vreo", + "vreun", + "vă", + "zece", + "zero", + "zi", + "zice", + "îi", + "îl", + "îmi", + "împotriva", + "în", + "înainte", + "înaintea", + "încotro", + "încât", + "încît", + "între", + "întrucât", + "întrucît", + "îţi", + "ăla", + "ălea", + "ăsta", + "ăstea", + "ăştia", + "şapte", + "şase", + "şi", + "ştiu", + "ţi", + "ţie", +]; diff --git a/vendor/elasticlunr-rs/src/lang/ru.rs b/vendor/elasticlunr-rs/src/lang/ru.rs new file mode 100644 index 000000000..6b210d540 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/ru.rs @@ -0,0 +1,463 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Russian {} + +impl Russian { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Russian { + fn name(&self) -> String { + "Russian".into() + } + fn code(&self) -> String { + "ru".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-ru", r"\p{Cyrillic}")), + Box::new(StopWordFilter::new("stopWordFilter-ru", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-ru", Algorithm::Russian)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "алло", + "без", + "близко", + "более", + "больше", + "будем", + "будет", + "будете", + "будешь", + "будто", + "буду", + "будут", + "будь", + "бы", + "бывает", + "бывь", + "был", + "была", + "были", + "было", + "быть", + "в", + "важная", + "важное", + "важные", + "важный", + "вам", + "вами", + "вас", + "ваш", + "ваша", + "ваше", + "ваши", + "вверх", + "вдали", + "вдруг", + "ведь", + "везде", + "весь", + "вниз", + "внизу", + "во", + "вокруг", + "вон", + "восемнадцатый", + "восемнадцать", + "восемь", + "восьмой", + "вот", + "впрочем", + "времени", + "время", + "все", + "всегда", + "всего", + "всем", + "всеми", + "всему", + "всех", + "всею", + "всю", + "всюду", + "вся", + "всё", + "второй", + "вы", + "г", + "где", + "говорил", + "говорит", + "год", + "года", + "году", + "да", + "давно", + "даже", + "далеко", + "дальше", + "даром", + "два", + "двадцатый", + "двадцать", + "две", + "двенадцатый", + "двенадцать", + "двух", + "девятнадцатый", + "девятнадцать", + "девятый", + "девять", + "действительно", + "дел", + "день", + "десятый", + "десять", + "для", + "до", + "довольно", + "долго", + "должно", + "другая", + "другие", + "других", + "друго", + "другое", + "другой", + "е", + "его", + "ее", + "ей", + "ему", + "если", + "есть", + "еще", + "ещё", + "ею", + "её", + "ж", + "же", + "жизнь", + "за", + "занят", + "занята", + "занято", + "заняты", + "затем", + "зато", + "зачем", + "здесь", + "значит", + "и", + "из", + "или", + "им", + "именно", + "иметь", + "ими", + "имя", + "иногда", + "их", + "к", + "каждая", + "каждое", + "каждые", + "каждый", + "кажется", + "как", + "какая", + "какой", + "кем", + "когда", + "кого", + "ком", + "кому", + "конечно", + "которая", + "которого", + "которой", + "которые", + "который", + "которых", + "кроме", + "кругом", + "кто", + "куда", + "лет", + "ли", + "лишь", + "лучше", + "люди", + "м", + "мало", + "между", + "меля", + "менее", + "меньше", + "меня", + "миллионов", + "мимо", + "мира", + "мне", + "много", + "многочисленная", + "многочисленное", + "многочисленные", + "многочисленный", + "мной", + "мною", + "мог", + "могут", + "мож", + "может", + "можно", + "можхо", + "мои", + "мой", + "мор", + "мочь", + "моя", + "моё", + "мы", + "на", + "наверху", + "над", + "надо", + "назад", + "наиболее", + "наконец", + "нам", + "нами", + "нас", + "начала", + "наш", + "наша", + "наше", + "наши", + "не", + "него", + "недавно", + "недалеко", + "нее", + "ней", + "нельзя", + "нем", + "немного", + "нему", + "непрерывно", + "нередко", + "несколько", + "нет", + "нею", + "неё", + "ни", + "нибудь", + "ниже", + "низко", + "никогда", + "никуда", + "ними", + "них", + "ничего", + "но", + "ну", + "нужно", + "нх", + "о", + "об", + "оба", + "обычно", + "один", + "одиннадцатый", + "одиннадцать", + "однажды", + "однако", + "одного", + "одной", + "около", + "он", + "она", + "они", + "оно", + "опять", + "особенно", + "от", + "отовсюду", + "отсюда", + "очень", + "первый", + "перед", + "по", + "под", + "пожалуйста", + "позже", + "пока", + "пор", + "пора", + "после", + "посреди", + "потом", + "потому", + "почему", + "почти", + "прекрасно", + "при", + "про", + "просто", + "против", + "процентов", + "пятнадцатый", + "пятнадцать", + "пятый", + "пять", + "раз", + "разве", + "рано", + "раньше", + "рядом", + "с", + "сам", + "сама", + "сами", + "самим", + "самими", + "самих", + "само", + "самого", + "самой", + "самом", + "самому", + "саму", + "свое", + "своего", + "своей", + "свои", + "своих", + "свою", + "сеаой", + "себе", + "себя", + "сегодня", + "седьмой", + "сейчас", + "семнадцатый", + "семнадцать", + "семь", + "сих", + "сказал", + "сказала", + "сказать", + "сколько", + "слишком", + "сначала", + "снова", + "со", + "собой", + "собою", + "совсем", + "спасибо", + "стал", + "суть", + "т", + "та", + "так", + "такая", + "также", + "такие", + "такое", + "такой", + "там", + "твой", + "твоя", + "твоё", + "те", + "тебе", + "тебя", + "тем", + "теми", + "теперь", + "тех", + "то", + "тобой", + "тобою", + "тогда", + "того", + "тоже", + "только", + "том", + "тому", + "тот", + "тою", + "третий", + "три", + "тринадцатый", + "тринадцать", + "ту", + "туда", + "тут", + "ты", + "тысяч", + "у", + "уж", + "уже", + "уметь", + "хорошо", + "хотеть", + "хоть", + "хотя", + "хочешь", + "часто", + "чаще", + "чего", + "человек", + "чем", + "чему", + "через", + "четвертый", + "четыре", + "четырнадцатый", + "четырнадцать", + "что", + "чтоб", + "чтобы", + "чуть", + "шестнадцатый", + "шестнадцать", + "шестой", + "шесть", + "эта", + "эти", + "этим", + "этими", + "этих", + "это", + "этого", + "этой", + "этом", + "этому", + "этот", + "эту", + "я", + "а", +]; diff --git a/vendor/elasticlunr-rs/src/lang/sv.rs b/vendor/elasticlunr-rs/src/lang/sv.rs new file mode 100644 index 000000000..29beeb7b1 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/sv.rs @@ -0,0 +1,51 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Swedish {} + +impl Swedish { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Swedish { + fn name(&self) -> String { + "Swedish".into() + } + fn code(&self) -> String { + "sv".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-sv", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-sv", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-sv", Algorithm::Swedish)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", "alla", "allt", "att", "av", "blev", "bli", "blir", "blivit", "de", "dem", "den", "denna", + "deras", "dess", "dessa", "det", "detta", "dig", "din", "dina", "ditt", "du", "där", "då", + "efter", "ej", "eller", "en", "er", "era", "ert", "ett", "från", "för", "ha", "hade", "han", + "hans", "har", "henne", "hennes", "hon", "honom", "hur", "här", "i", "icke", "ingen", "inom", + "inte", "jag", "ju", "kan", "kunde", "man", "med", "mellan", "men", "mig", "min", "mina", + "mitt", "mot", "mycket", "ni", "nu", "när", "någon", "något", "några", "och", "om", "oss", + "på", "samma", "sedan", "sig", "sin", "sina", "sitta", "själv", "skulle", "som", "så", "sådan", + "sådana", "sådant", "till", "under", "upp", "ut", "utan", "vad", "var", "vara", "varför", + "varit", "varje", "vars", "vart", "vem", "vi", "vid", "vilka", "vilkas", "vilken", "vilket", + "vår", "våra", "vårt", "än", "är", "åt", "över", +]; diff --git a/vendor/elasticlunr-rs/src/lang/tr.rs b/vendor/elasticlunr-rs/src/lang/tr.rs new file mode 100644 index 000000000..1aea580fa --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/tr.rs @@ -0,0 +1,251 @@ +use super::{ + common::{RustStemmer, StopWordFilter, RegexTrimmer}, + Language, +}; +use crate::pipeline::Pipeline; +use rust_stemmers::Algorithm; + +#[derive(Clone)] +pub struct Turkish {} + +impl Turkish { + pub fn new() -> Self { + Self {} + } +} + +impl Language for Turkish { + fn name(&self) -> String { + "Turkish".into() + } + fn code(&self) -> String { + "tr".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + super::tokenize_whitespace(text) + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-tr", r"\p{Latin}")), + Box::new(StopWordFilter::new("stopWordFilter-tr", STOP_WORDS)), + Box::new(RustStemmer::new("stemmer-tr", Algorithm::Turkish)), + ], + } + } +} + +const STOP_WORDS: &[&str] = &[ + "", + "acaba", + "altmış", + "altı", + "ama", + "ancak", + "arada", + "aslında", + "ayrıca", + "bana", + "bazı", + "belki", + "ben", + "benden", + "beni", + "benim", + "beri", + "beş", + "bile", + "bin", + "bir", + "biri", + "birkaç", + "birkez", + "birçok", + "birşey", + "birşeyi", + "biz", + "bizden", + "bize", + "bizi", + "bizim", + "bu", + "buna", + "bunda", + "bundan", + "bunlar", + "bunları", + "bunların", + "bunu", + "bunun", + "burada", + "böyle", + "böylece", + "da", + "daha", + "dahi", + "de", + "defa", + "değil", + "diye", + "diğer", + "doksan", + "dokuz", + "dolayı", + "dolayısıyla", + "dört", + "edecek", + "eden", + "ederek", + "edilecek", + "ediliyor", + "edilmesi", + "ediyor", + "elli", + "en", + "etmesi", + "etti", + "ettiği", + "ettiğini", + "eğer", + "gibi", + "göre", + "halen", + "hangi", + "hatta", + "hem", + "henüz", + "hep", + "hepsi", + "her", + "herhangi", + "herkesin", + "hiç", + "hiçbir", + "iki", + "ile", + "ilgili", + "ise", + "itibaren", + "itibariyle", + "için", + "işte", + "kadar", + "karşın", + "katrilyon", + "kendi", + "kendilerine", + "kendini", + "kendisi", + "kendisine", + "kendisini", + "kez", + "ki", + "kim", + "kimden", + "kime", + "kimi", + "kimse", + "kırk", + "milyar", + "milyon", + "mu", + "mü", + "mı", + "nasıl", + "ne", + "neden", + "nedenle", + "nerde", + "nerede", + "nereye", + "niye", + "niçin", + "o", + "olan", + "olarak", + "oldu", + "olduklarını", + "olduğu", + "olduğunu", + "olmadı", + "olmadığı", + "olmak", + "olması", + "olmayan", + "olmaz", + "olsa", + "olsun", + "olup", + "olur", + "olursa", + "oluyor", + "on", + "ona", + "ondan", + "onlar", + "onlardan", + "onları", + "onların", + "onu", + "onun", + "otuz", + "oysa", + "pek", + "rağmen", + "sadece", + "sanki", + "sekiz", + "seksen", + "sen", + "senden", + "seni", + "senin", + "siz", + "sizden", + "sizi", + "sizin", + "tarafından", + "trilyon", + "tüm", + "var", + "vardı", + "ve", + "veya", + "ya", + "yani", + "yapacak", + "yapmak", + "yaptı", + "yaptıkları", + "yaptığı", + "yaptığını", + "yapılan", + "yapılması", + "yapıyor", + "yedi", + "yerine", + "yetmiş", + "yine", + "yirmi", + "yoksa", + "yüz", + "zaten", + "çok", + "çünkü", + "öyle", + "üzere", + "üç", + "şey", + "şeyden", + "şeyi", + "şeyler", + "şu", + "şuna", + "şunda", + "şundan", + "şunları", + "şunu", + "şöyle", +]; diff --git a/vendor/elasticlunr-rs/src/lang/zh.rs b/vendor/elasticlunr-rs/src/lang/zh.rs new file mode 100644 index 000000000..aa10d758f --- /dev/null +++ b/vendor/elasticlunr-rs/src/lang/zh.rs @@ -0,0 +1,55 @@ +use super::{common::RegexTrimmer, Language}; +use crate::pipeline::{FnWrapper, Pipeline}; + +#[derive(Clone)] +pub struct Chinese { + jieba: jieba_rs::Jieba, +} + +impl Chinese { + pub fn new() -> Self { + Self { + jieba: jieba_rs::Jieba::new(), + } + } +} + +impl Language for Chinese { + fn name(&self) -> String { + "Chinese".into() + } + fn code(&self) -> String { + "zh".into() + } + + fn tokenize(&self, text: &str) -> Vec<String> { + self.jieba + .cut_for_search(text, false) + .iter() + .map(|s| s.to_string()) + .collect() + } + + fn make_pipeline(&self) -> Pipeline { + Pipeline { + queue: vec![ + Box::new(RegexTrimmer::new("trimmer-zh", r"\p{Unified_Ideograph}\p{Latin}")), + Box::new(FnWrapper("stopWordFilter-zh".into(), stop_word_filter)), + Box::new(FnWrapper("stemmer-zh".into(), stemmer)), + ], + } + } +} + +// TODO: lunr.zh.js has a much larger set of stop words +fn stop_word_filter(token: String) -> Option<String> { + match token.as_str() { + "的" | "了" => None, + _ => Some(token), + } +} + +// lunr.zh.js has an empty stemmer as well +fn stemmer(token: String) -> Option<String> { + Some(token) +} diff --git a/vendor/elasticlunr-rs/src/lib.rs b/vendor/elasticlunr-rs/src/lib.rs new file mode 100644 index 000000000..3efcf4629 --- /dev/null +++ b/vendor/elasticlunr-rs/src/lib.rs @@ -0,0 +1,413 @@ +//!# elasticlunr-rs +//! +//! [![Build Status](https://travis-ci.org/mattico/elasticlunr-rs.svg?branch=master)](https://travis-ci.org/mattico/elasticlunr-rs) +//! [![Documentation](https://docs.rs/elasticlunr-rs/badge.svg)](https://docs.rs/elasticlunr-rs) +//! [![Crates.io](https://img.shields.io/crates/v/elasticlunr-rs.svg)](https://crates.io/crates/elasticlunr-rs) +//! +//! A partial port of [elasticlunr](https://github.com/weixsong/elasticlunr.js) to Rust. Intended to +//! be used for generating compatible search indices. +//! +//! Access to all index-generating functionality is provided. Most users will only need to use the +//! [`Index`](struct.Index.html) or [`IndexBuilder`](struct.IndexBuilder.html) types. +//! +//! The [`Language`] trait can be used to implement a custom language. +//! +//! ## Example +//! +//! ``` +//! use std::fs::File; +//! use std::io::Write; +//! use elasticlunr::Index; +//! +//! let mut index = Index::new(&["title", "body"]); +//! index.add_doc("1", &["This is a title", "This is body text!"]); +//! // Add more docs... +//! let mut file = File::create("out.json").unwrap(); +//! file.write_all(index.to_json_pretty().as_bytes()); +//! ``` + +#[macro_use] +extern crate serde_derive; + +#[cfg(test)] +#[macro_use] +extern crate maplit; + +/// The version of elasticlunr.js this library was designed for. +pub const ELASTICLUNR_VERSION: &str = "0.9.5"; + +pub mod config; +pub mod document_store; +pub mod inverted_index; +pub mod lang; +pub mod pipeline; + +use std::collections::BTreeMap; + +use document_store::DocumentStore; +use inverted_index::InvertedIndex; +use lang::English; +pub use lang::Language; +pub use pipeline::Pipeline; + +type Tokenizer = Option<Box<dyn Fn(&str) -> Vec<String>>>; + +/// A builder for an `Index` with custom parameters. +/// +/// # Example +/// ``` +/// # use elasticlunr::{Index, IndexBuilder}; +/// let mut index = IndexBuilder::new() +/// .save_docs(false) +/// .add_fields(&["title", "subtitle", "body"]) +/// .set_ref("doc_id") +/// .build(); +/// index.add_doc("doc_a", &["Chapter 1", "Welcome to Copenhagen", "..."]); +/// ``` +pub struct IndexBuilder { + save: bool, + fields: Vec<String>, + field_tokenizers: Vec<Tokenizer>, + ref_field: String, + pipeline: Option<Pipeline>, + language: Box<dyn Language>, +} + +impl Default for IndexBuilder { + fn default() -> Self { + IndexBuilder { + save: true, + fields: Vec::new(), + field_tokenizers: Vec::new(), + ref_field: "id".into(), + pipeline: None, + language: Box::new(English::new()), + } + } +} + +impl IndexBuilder { + pub fn new() -> Self { + Default::default() + } + + pub fn with_language(language: Box<dyn Language>) -> Self { + Self { + language, + ..Default::default() + } + } + + /// Set whether or not documents should be saved in the `Index`'s document store. + pub fn save_docs(mut self, save: bool) -> Self { + self.save = save; + self + } + + /// Add a document field to the `Index`. + /// + /// # Panics + /// + /// Panics if a field with the name already exists. + pub fn add_field(mut self, field: &str) -> Self { + let field = field.into(); + if self.fields.contains(&field) { + panic!("Duplicate fields in index: {}", field); + } + self.fields.push(field); + self.field_tokenizers.push(None); + self + } + + /// Add a document field to the `Index`, with a custom tokenizer for that field. + /// + /// # Panics + /// + /// Panics if a field with the name already exists. + pub fn add_field_with_tokenizer( + mut self, + field: &str, + tokenizer: Box<dyn Fn(&str) -> Vec<String>>, + ) -> Self { + let field = field.into(); + if self.fields.contains(&field) { + panic!("Duplicate fields in index: {}", field); + } + self.fields.push(field); + self.field_tokenizers.push(Some(tokenizer)); + self + } + + /// Add the document fields to the `Index`. + /// + /// # Panics + /// + /// Panics if two fields have the same name. + pub fn add_fields<I>(mut self, fields: I) -> Self + where + I: IntoIterator, + I::Item: AsRef<str>, + { + for field in fields { + self = self.add_field(field.as_ref()) + } + self + } + + /// Set the key used to store the document reference field. + pub fn set_ref(mut self, ref_field: &str) -> Self { + self.ref_field = ref_field.into(); + self + } + + /// Build an `Index` from this builder. + pub fn build(self) -> Index { + let IndexBuilder { + save, + fields, + field_tokenizers, + ref_field, + pipeline, + language, + } = self; + + let index = fields + .iter() + .map(|f| (f.clone(), InvertedIndex::new())) + .collect(); + + let pipeline = pipeline.unwrap_or_else(|| language.make_pipeline()); + + Index { + index, + fields: fields, + field_tokenizers: field_tokenizers, + ref_field: ref_field, + document_store: DocumentStore::new(save), + pipeline, + version: crate::ELASTICLUNR_VERSION, + lang: language, + } + } +} + +/// An elasticlunr search index. +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Index { + fields: Vec<String>, + #[serde(skip)] + field_tokenizers: Vec<Tokenizer>, + pipeline: Pipeline, + #[serde(rename = "ref")] + ref_field: String, + version: &'static str, + index: BTreeMap<String, InvertedIndex>, + document_store: DocumentStore, + #[serde(with = "ser_lang")] + lang: Box<dyn Language>, +} + +mod ser_lang { + use crate::Language; + use serde::de; + use serde::{Deserializer, Serializer}; + use std::fmt; + + pub fn serialize<S>(lang: &Box<dyn Language>, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + serializer.serialize_str(&lang.name()) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result<Box<dyn Language>, D::Error> + where + D: Deserializer<'de>, + { + deserializer.deserialize_str(LanguageVisitor) + } + + struct LanguageVisitor; + + impl<'de> de::Visitor<'de> for LanguageVisitor { + type Value = Box<dyn Language>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a capitalized language name") + } + + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + match crate::lang::from_name(v) { + Some(l) => Ok(l), + None => Err(E::custom(format!("Unknown language name: {}", v))), + } + } + } +} + +impl Index { + /// Create a new index with the provided fields. + /// + /// # Example + /// + /// ``` + /// # use elasticlunr::{Index}; + /// let mut index = Index::new(&["title", "body"]); + /// index.add_doc("1", &["this is a title", "this is body text"]); + /// ``` + /// + /// # Panics + /// + /// Panics if a field with the name already exists. + pub fn new<I>(fields: I) -> Self + where + I: IntoIterator, + I::Item: AsRef<str>, + { + IndexBuilder::new().add_fields(fields).build() + } + + /// Create a new index with the provided fields for the given + /// [`Language`](lang/enum.Language.html). + /// + /// # Example + /// + /// ``` + /// use elasticlunr::{Index, lang::English}; + /// let mut index = Index::with_language(Box::new(English::new()), &["title", "body"]); + /// index.add_doc("1", &["this is a title", "this is body text"]); + /// ``` + /// + /// # Panics + /// + /// Panics if a field with the name already exists. + pub fn with_language<I>(lang: Box<dyn Language>, fields: I) -> Self + where + I: IntoIterator, + I::Item: AsRef<str>, + { + IndexBuilder::with_language(lang).add_fields(fields).build() + } + + /// Add the data from a document to the index. + /// + /// *NOTE: The elements of `data` should be provided in the same order as + /// the fields used to create the index.* + /// + /// # Example + /// ``` + /// # use elasticlunr::Index; + /// let mut index = Index::new(&["title", "body"]); + /// index.add_doc("1", &["this is a title", "this is body text"]); + /// ``` + pub fn add_doc<I>(&mut self, doc_ref: &str, data: I) + where + I: IntoIterator, + I::Item: AsRef<str>, + { + let mut doc = BTreeMap::new(); + doc.insert(self.ref_field.clone(), doc_ref.into()); + let mut token_freq = BTreeMap::new(); + + for (i, value) in data.into_iter().enumerate() { + let field = &self.fields[i]; + let tokenizer = self.field_tokenizers[i].as_ref(); + doc.insert(field.clone(), value.as_ref().to_string()); + + if field == &self.ref_field { + continue; + } + + let raw_tokens = if let Some(tokenizer) = tokenizer { + tokenizer(value.as_ref()) + } else { + self.lang.tokenize(value.as_ref()) + }; + + let tokens = self.pipeline.run(raw_tokens); + + self.document_store + .add_field_length(doc_ref, field, tokens.len()); + + for token in tokens { + *token_freq.entry(token).or_insert(0u64) += 1; + } + + for (token, count) in &token_freq { + let freq = (*count as f64).sqrt(); + + self.index + .get_mut(field) + .unwrap_or_else(|| panic!("InvertedIndex does not exist for field {}", field)) + .add_token(doc_ref, token, freq); + } + } + + self.document_store.add_doc(doc_ref, doc); + } + + pub fn get_fields(&self) -> &[String] { + &self.fields + } + + /// Returns the index, serialized to pretty-printed JSON. + pub fn to_json_pretty(&self) -> String { + serde_json::to_string_pretty(&self).unwrap() + } + + /// Returns the index, serialized to JSON. + pub fn to_json(&self) -> String { + serde_json::to_string(&self).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add_field_to_builder() { + let idx = IndexBuilder::new() + .add_fields(&["foo", "bar", "baz"]) + .build(); + + let idx_fields = idx.get_fields(); + for f in &["foo", "bar", "baz"] { + assert_eq!(idx_fields.iter().filter(|x| x == f).count(), 1); + } + } + + #[test] + fn adding_document_to_index() { + let mut idx = Index::new(&["body"]); + idx.add_doc("1", &["this is a test"]); + + assert_eq!(idx.document_store.len(), 1); + assert_eq!( + idx.document_store.get_doc("1").unwrap(), + btreemap! { + "id".into() => "1".into(), + "body".into() => "this is a test".into(), + } + ); + } + + #[test] + fn adding_document_with_empty_field() { + let mut idx = Index::new(&["title", "body"]); + + idx.add_doc("1", &["", "test"]); + assert_eq!(idx.index["body"].get_doc_frequency("test"), 1); + assert_eq!(idx.index["body"].get_docs("test").unwrap()["1"], 1.); + } + + #[test] + #[should_panic] + fn creating_index_with_identical_fields_panics() { + let _idx = Index::new(&["title", "body", "title"]); + } +} diff --git a/vendor/elasticlunr-rs/src/pipeline.rs b/vendor/elasticlunr-rs/src/pipeline.rs new file mode 100644 index 000000000..a20de3f11 --- /dev/null +++ b/vendor/elasticlunr-rs/src/pipeline.rs @@ -0,0 +1,65 @@ +//! Defines the pipeline which processes text for inclusion in the index. Most users do not need +//! to use this module directly. + +use serde::ser::{Serialize, SerializeSeq, Serializer}; + +pub trait PipelineFn { + fn name(&self) -> String; + + fn filter(&self, token: String) -> Option<String>; +} + +#[derive(Clone)] +pub struct FnWrapper(pub String, pub fn(String) -> Option<String>); + +impl PipelineFn for FnWrapper { + fn name(&self) -> String { + self.0.clone() + } + + fn filter(&self, token: String) -> Option<String> { + (self.1)(token) + } +} + +/// A sequence of `PipelineFn`s which are run on tokens to prepare them for searching. +#[derive(Deserialize)] +pub struct Pipeline { + #[serde(skip_deserializing)] + pub queue: Vec<Box<dyn PipelineFn>>, +} + +impl Serialize for Pipeline { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.queue.len()))?; + for elem in &self.queue { + seq.serialize_element(&elem.name())?; + } + seq.end() + } +} + +impl Pipeline { + /// Run the Pipeline against the given vector of tokens. The returned vector may be shorter + /// than the input if a pipeline function returns `None` for a token. + pub fn run(&self, tokens: Vec<String>) -> Vec<String> { + let mut ret = vec![]; + for token in tokens { + let mut token = Some(token); + for func in &self.queue { + if let Some(t) = token { + token = func.filter(t); + } else { + break; + } + } + if let Some(t) = token { + ret.push(t); + } + } + ret + } +} diff --git a/vendor/elasticlunr-rs/tests/data/ar.in.txt b/vendor/elasticlunr-rs/tests/data/ar.in.txt new file mode 100644 index 000000000..05067a8c6 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ar.in.txt @@ -0,0 +1,7 @@ +استعار جحا مرة آنية من جاره وعندما أعادها له أعاد معها آنية صغيرة + فسأله جاره لماذا أعدت مع أنيتي آنية صغيرة يا جحا؟ +فقال له جحا: إنّ آنيتك ولدت في الأمس آنية صغيرة وإنّها الآن من حقك، فرح الرجل وأخذ الطنجرة ودخل بيته، +وبعد فترة من الزمان ذهب جحا إلى جاره وطلب منه أنية أخرى، فأعطاه جاره ما طلب، مرّ وقت طويل ولم يُعد جحا الآنية، +فذهب جاره إلى بيته ليطلبها منه، فاستقبله جحا باكياً منتحباً، +فقال له الرجل: مالي أراك باكياً يا جحا؟!! فقال له جحا وهو يبكي إنّ آنيتك توفيت بالأمس يا صاحبي، +فقال له جاره وهو غاضب: وكيف لآنيةٍ أن تموت يا رجل؟!! فقال جحا أتصدق أنّ إناء قد يلد ولا تصدق أنّه قد يموت؟! diff --git a/vendor/elasticlunr-rs/tests/data/ar.out.txt b/vendor/elasticlunr-rs/tests/data/ar.out.txt new file mode 100644 index 000000000..fc295d0c6 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ar.out.txt @@ -0,0 +1,118 @@ +استعار +جحا +مرة +انية +من +جاره +وعندما +اعادها +له +اعاد +معها +انية +صغيرة +فساله +جاره +لماذا +اعدت +مع +انيتي +انية +صغيرة +يا +جحا؟ +فقال +له +جحا: +ان +انيتك +ولدت +في +الامس +انية +صغيرة +وانها +الان +من +حقك، +فرح +الرجل +واخذ +الطنجرة +ودخل +بيته، +وبعد +فترة +من +الزمان +ذهب +جحا +الى +جاره +وطلب +منه +انية +اخرى، +فاعطاه +جاره +ما +طلب، +مر +وقت +طويل +ولم +يعد +جحا +الانية، +فذهب +جاره +الى +بيته +ليطلبها +منه، +فاستقبله +جحا +باكيا +منتحبا، +فقال +له +الرجل: +مالي +اراك +باكيا +يا +جحا؟!! +فقال +له +جحا +وهو +يبكي +ان +انيتك +توفيت +بالامس +يا +صاحبي، +فقال +له +جاره +وهو +غاضب: +وكيف +لانية +ان +تموت +يا +رجل؟!! +فقال +جحا +اتصدق +ان +اناء +قد +يلد +ولا +تصدق +انه +قد +يموت؟! diff --git a/vendor/elasticlunr-rs/tests/data/da.in.txt b/vendor/elasticlunr-rs/tests/data/da.in.txt new file mode 100644 index 000000000..b88e6757e --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/da.in.txt @@ -0,0 +1,60 @@ +I det lille Værtshus i Genf, hvor Russerne plejede at have deres +Tilhold, nød Helene hurtigt sit beskedne Aftensmaaltid uden som +sædvanlig at drikke en Kop Kaffe dertil -- en Luksus, hun ikke havde +nægtet sig lige siden den Dag, hun havde faaet sine Elever i Russisk. +Men i Aften maatte hun skynde sig; et længe ventet Brev fra Rusland laa +gemt i hendes Lomme. Hun havde for et Øjeblik siden faaet det af den +gamle, hvidhaarede Urmager, til hvem hele hendes udenlandske +Korrespondance blev adresseret, og hun brændte af Utaalmodighed efter at +erfare de Nyheder, som det i al Almindelighed maatte indeholde, og efter +at faa det overbragt til sin Ven Andrey, hvem det dog fornemmelig angik. + +Hun vekslede nogle Ord med en anden landflygtig, krydsede imellem de +mange Rækker smaa Borde, ved hvilke der overalt sad Mænd i +Arbejdsbluser, og naaede ud paa Gaden. Klokken var kun halvsyv, hun var +sikker paa at træffe Andrey hjemme. Han boede i Nærheden, og efter fem +Minutters Forløb befandt Helene sig uden for hans Dør. Hendes smukke, +noget stillestaaende Ansigt havde faaet en let Farve af den hurtige +Gang. + +Andrey var alene, i Færd med at gøre Uddrag af en Statistik, som han +benyttede til Grundlag for den Artikel, han hver Uge skrev til et +russisk Provinsblad. Han vendte Hovedet og rejste sig med udstrakt Haand +for at byde sin Gæst velkommen. + +„Her er et Brev til dig!“ sagde Helene, idet hun gav ham Haanden. + +„Naa, endelig!“ udbrød han. + +Andrey var en Mand paa seks-syv og tyve Aar med et alvorligt, godmodigt +Ansigt, lidt skarpt og regelmæssigt i Trækkene. Over hans Pande laa Spor +af tidlige Sorger, og hans Øjne var ualmindelig dybe og tankefulde, men +dette forringede ikke det Indtryk af Ro og Bestemthed, man fik af hele +hans kraftige, velformede Skikkelse. + +Der gled en let Rødme over hans Pande, idet hans slanke, muskelstærke +Fingre med nervøs Hast rev Konvolutten op og fremdrog et stort Ark +Papir, bedækket med Linier i vid Afstand fra hinanden, skrevne med en +uregelmæssig, sammentrængt Haandskrift. + +Helene, der ikke syntes at være mindre utaalmodig end han, gik hen til +ham og lagde Haanden paa hans Skulder for ogsaa at kunne læse i Brevet. + +„Det er bedre, at vi sætter os ned, Helene!“ sagde den unge Mand. „Du +skygger for Lyset med dine Krøller!“ + +Det mere end tarvelige Værelse var kun sparsomt oplyst af en eneste +Lampe, dækket af en grøn Papirskærm, saaledes at kun en Del af +Brædegulvet, Benene paa nogle simple Stole og den nederste Del af en +Mahogni Kommode -- Værelsets fornemste Prydelse -- var helt oplyst. +Væggene, som var betrukne med gult Tapetpapir og prydede med et billigt +Litografi af den schweiziske General Dufour, et Landskab, et Fotografi +af Værtindens afdøde Ægteherre og hendes Eksamensbevis fra Skoletiden, +indfattet i Glas og Ramme, var hyllede i et diskret Tusmørke, meget +klædeligt for disse Kunstværker, men umuligt at læse i. + +Andrey stillede endnu en Stol hen til det runde Spisebord, som var +dækket med Bøger og Papirer, og drejede Lampeskærmen saaledes, at det +Hjørne, han plejede at bruge som Skrivebord, var helt oplyst. Helene +satte sig ved Siden af ham og saa nær, at hendes Haar undertiden berørte +hans; men ingen af dem ænsede det, saa optagne var de af deres Tanker.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/da.out.txt b/vendor/elasticlunr-rs/tests/data/da.out.txt new file mode 100644 index 000000000..332028be2 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/da.out.txt @@ -0,0 +1,302 @@ +lil +værtshus +genf +rus +plejed +tilhold +nød +hel +hurt +beskedn +aftensmaaltid +uden +sædvan +drik +kop +kaf +dertil +luksus +nægt +lig +sid +dag +faaet +elev +russisk +aft +maat +skynd +læng +vent +brev +rusland +laa +gemt +lom +øjeblik +sid +faaet +gaml +hvidhaared +urmag +hvem +hel +udenlandsk +korrespondanc +adres +brænd +utaalmod +erfar +nyhed +al +almind +maat +indehold +faa +overbrag +ven +andrey +hvem +fornem +angik +veksled +ord +landflyg +krydsed +imellem +ræk +smaa +bord +ved +hvilk +overalt +sad +mænd +arbejdsblus +naaed +paa +gad +klok +kun +halvsyv +sik +paa +træf +andrey +hjem +boed +nær +fem +minut +forløb +befand +hel +uden +dør +smuk +stillestaa +ans +faaet +let +farv +hurt +gang +andrey +alen +færd +gør +uddrag +statistik +benytted +grundlag +artikel +hver +uge +skrev +russisk +provinsblad +vend +hoved +rejst +udstrak +haand +byd +gæst +velkom +brev +sagd +hel +idet +gav +haand +naa +end +udbrød +andrey +mand +paa +sek +syv +tyv +aar +alvor +godmod +ans +lidt +skarpt +regelmæs +træk +pand +laa +spor +tid +sorg +øjn +ualmind +dyb +tankefuld +forringed +indtryk +ro +bestemt +fik +hel +kraft +velformed +skik +gled +let +rødm +pand +idet +slank +muskelstærk +fingr +nervøs +hast +rev +konvolut +fremdrog +stort +ark +papir +bedæk +lini +vid +afstand +hinand +skrevn +uregelmæs +sammentræng +haandskrift +hel +synt +mindr +utaalmod +gik +hen +lagd +haand +paa +skuld +ogsaa +læs +brev +bedr +sæt +hel +sagd +ung +mand +skyg +lys +din +krøl +mer +tarv +vær +kun +sparsomt +oplyst +enest +lamp +dæk +grøn +papirskærm +saaled +kun +del +brædegulv +ben +paa +simpl +stol +nederst +del +mahogni +kommod +vær +fornemst +pryd +helt +oplyst +væg +betrukn +gult +tapetpapir +pryded +bil +litografi +schweizisk +general +dufour +landskab +fotografi +værtind +afdød +ægteher +eksamensbevis +skoletid +indfat +glas +ram +hylled +diskr +tusmørk +klæd +kunstværk +umu +læs +andrey +stilled +endnu +stol +hen +rund +spisebord +dæk +bøg +papir +drejed +lampeskærm +saaled +hjørn +plejed +brug +skrivebord +helt +oplyst +hel +sat +ved +sid +saa +nær +haar +undertid +berørt +ing +ænsed +saa +optagn +tank diff --git a/vendor/elasticlunr-rs/tests/data/de.in.txt b/vendor/elasticlunr-rs/tests/data/de.in.txt new file mode 100644 index 000000000..23354f245 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/de.in.txt @@ -0,0 +1,68 @@ +Briefe und die letzten Vorbereitungen füllten den gestrigen Tag. +Müde und abgespannt, eigentlich krank und fiebernd stieg ich in Graz +Abends 6 Uhr in den Eisenbahnwagen; erst da ich heute Morgens das Meer +wieder sah und dem alten Lieblinge das freudige Θάλαττα! Θάλαττα! +entgegenrufen konnte, ward mir wieder wohl in Leib und Seele. + +Die Nacht war kalt gewesen, wie wenn dem Kalender zum Trotze der Winter +noch fortdauere. Oder wollte sich die Heimath nur eindringlich dem +Scheidenden in’s Gedächtniß heften? Umsonst die Angst, daß ich sie +vergesse! es liegt ja die Nothwendigkeit der Rückkehr vor mir. Lange +konnte ich den Schlaf nicht finden; dafür fand ich in der Ungestörtheit +des Alleinseins mich selbst wieder, der sich in den Sorgen und Mühen +der letzten Monate verloren hatte. Es ist das ein Vortheil des +Reisens, daß es uns mit der Unabhängigkeit auch die unabweisliche +Selbständigkeit gibt; herausgerissen aus der Bequemlichkeit der +gewöhnlichen Verhältnisse, zwingt es uns die Gedanken und die Hilfe, +die wir sonst rechts und links neben uns schon hergerichtet fanden, +nunmehr in uns selbst zu suchen. Menschen, die sich bisher noch gar +nicht kannten, haben sich oft am ersten Reisetage erst erkennen lernen. +Ein Gang in die weite Welt ist die beste Schule für das Leben, und +gerade für uns Kinder der Civilisation eine um so unentbehrlichere, +als wir in stubenhockerischen Gewohnheiten den Contact mit der Natur +verloren haben. Diese und sich selbst findet der verzogene Mensch +dort wieder und so auch die Freiheit, die nur dort ist, wo der Mensch +allein, oder wo er fremd unter Hunderten seines Gleichen steht. + +Nach 6 Uhr erwache ich. Ich sehe den Karst, auf dessen Höhe wir +fahren; die Sonne ist vom Regen versteckt, der die Steinfelder dieser +Berge noch unwirthlicher als sonst erscheinen läßt. In Nabresina hält +der Zug; die Bahn nach Italien trennt sich hier von der, welche den +Karst hinab nach Triest führt. Der Bahnhof ist groß und zweckmäßig +eingerichtet. Schon singt Alles das Italienische. Erfreut durch die +bekannten Klänge beobachte ich das zu- und abströmende Gedränge. +Ein Conducteur war mir darin aufgefallen, weil seine Blicke mich +unablässig verfolgten. War der Mann ein Vertrauter der Polizei und +hielt er mich für einen Flüchtling? Jetzt drängte er sich zu an die +offene Wagenthüre, umfaßte meine Knie, er hatte mich erkannt! Es war +Venerando, der Gondolier, der mich in Venedig immer geführt hatte. Wie +aber auch hätte ich ihn, den zierlichen, schlanken Burschen, der mich +so oft in der ärgsten Sommerhitze, nichts als ein Hemd und die leichte +Hose an, nach dem Lido, nach den Inseln, nach Torcello oder nach San +Francesco del Deserto gerudert hatte, in der steifen, zugeknöpften +Eisenbahnuniform erkennen sollen? Früh Morgens schon klopfte er damals +an meine Thüre. Ich wollte die Leute schonen und so verneinte ich +die Absicht einer Fahrt. Er aber kannte die stille Neigung meiner +Wünsche und aufopfernd wußte er mich bald zu überreden, mich ihm und +seinem Genossen hinzugeben. Landeten wir dann nach stundenlanger +Fahrt an einsam abgelegener Küste und hatte ich die Früchte, die ich +mitgenommen, mit ihnen getheilt, so geleitete er mich in das Innere des +Landes, dem Fremdlinge die herrlichen Reste einer abgestorbenen Kunst +mit all’ dem Schönheitssinn und all’ der Liebe zu seinem Vaterlande +zu erklären, die dem Südländer, und dem Italiener insbesondere, +eigen sind. War ich müde geworden, so ruhten wir neben einander auf +dem Strande aus, dem das Meer mit leicht aufschlagenden Wellen, die +immer näher unsern Füßen kamen, vertraute Grüße aus entlegenen +Fernen zubrachte. Sein fortwährendes Gelispel machte die Rede meines +Venerando noch geschwätziger. Von Venedig erzählte er mir, das vor uns +lag im Dufte gluthvoller Mittagssonne, von den Lagunen und von den +Geheimnissen, die sich nächtlich darauf begeben; zuweilen auch, wenn +ich ihm besonders geneigt schien, von sich und seinen Freunden und +daß er schon einmal das Messer gezückt, weil man seinem Weibe zu nahe +treten wollte. Ich hörte ihm immer mit regem Interesse zu; seine Worte +waren gut gewählt und seine Stimme klang melodisch. Erst Abends, wenn +die Sonne schon auf den schneeigen Gipfeln der Alpen ruhte, ruderte +er mich zurück durch das purpurfarbene Meer nach der goldbethürmten, +kuppelbedeckten Stadt. Mit mir trug ich kostbare Erinnerungen, die +ich unvergeßlich festhalte und ihm treulich danke. Sein Gefährte hieß +Beppo, aber er war vergleichsweise unbedeutend.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/de.out.txt b/vendor/elasticlunr-rs/tests/data/de.out.txt new file mode 100644 index 000000000..43423963c --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/de.out.txt @@ -0,0 +1,333 @@ +brief +letzt +vorbereit +fullt +gestrig +tag +mud +abgespannt +eigent +krank +fiebernd +stieg +graz +abend +uhr +eisenbahnwag +erst +heut +morg +meer +sah +alt +liebling +freudig +entgegenruf +konnt +ward +wohl +leib +seel +nacht +kalt +kalend +trotz +wint +fortdau +heimath +eindring +scheidend +in’s +gedachtniss +heft +umson +angst +vergess +liegt +ja +nothwend +ruckkehr +lang +konnt +schlaf +find +dafur +fand +ungestort +alleinsein +sorg +muh +letzt +monat +verlor +vortheil +reis +unabhang +unabweis +selbstand +gibt +herausgeriss +bequem +gewohn +verhaltnis +zwingt +gedank +hilf +recht +link +neb +schon +hergerichtet +fand +nunmehr +such +mensch +bish +gar +kannt +oft +erst +reisetag +erst +erkenn +lern +gang +weit +welt +best +schul +leb +gerad +kind +civilisation +unentbehr +stubenhocker +gewohn +contact +natur +verlor +findet +verzog +mensch +freiheit +mensch +allein +fremd +hundert +gleich +steht +uhr +erwach +seh +karst +hoh +fahr +sonn +reg +versteckt +steinfeld +berg +unwirth +erschein +lasst +nabresina +halt +zug +bahn +itali +trennt +karst +hinab +triest +fuhrt +bahnhof +gross +zweckmass +eingerichtet +schon +singt +italien +erfreut +bekannt +klang +beobacht +abstrom +gedrang +conducteur +darin +aufgefall +blick +unablass +verfolgt +mann +vertraut +polizei +hielt +fluchtling +drangt +off +wagenthur +umfasst +knie +erkannt +venerando +gondoli +vened +imm +gefuhrt +hatt +zierlich +schlank +bursch +oft +argst +sommerhitz +hemd +leicht +hos +lido +inseln +torcello +san +francesco +del +deserto +gerudert +steif +zugeknopft +eisenbahnuniform +erkenn +soll +fruh +morg +schon +klopft +damal +thur +leut +schon +verneint +absicht +fahrt +kannt +still +neigung +wunsch +aufopfernd +wusst +bald +uberred +genoss +hinzugeb +landet +stundenlang +fahrt +einsam +abgeleg +kust +frucht +mitgenomm +getheilt +geleitet +inn +land +fremdling +herrlich +rest +abgestorb +kunst +all +schonheitssinn +all +lieb +vaterland +erklar +sudland +itali +insbesond +eig +mud +geword +ruht +neb +einand +strand +meer +leicht +aufschlag +well +imm +nah +uns +fuss +kam +vertraut +gruss +entleg +fern +zubracht +fortwahr +gelispel +macht +red +venerando +geschwatz +vened +erzahlt +lag +duft +gluthvoll +mittagssonn +lagun +geheimnis +nachtlich +darauf +begeb +zuweil +besond +geneigt +schien +freund +schon +mess +gezuckt +weib +nah +tret +hort +imm +reg +interess +wort +gut +gewahlt +stimm +klang +melod +erst +abend +sonn +schon +schneeig +gipfeln +alp +ruht +rudert +zuruck +purpurfarb +meer +goldbethurmt +kuppelbedeckt +stadt +trug +kostbar +erinner +unvergess +festhalt +treulich +dank +gefahrt +hiess +beppo +vergleichsweis +unbedeut diff --git a/vendor/elasticlunr-rs/tests/data/du.in.txt b/vendor/elasticlunr-rs/tests/data/du.in.txt new file mode 100644 index 000000000..cc66b87fb --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/du.in.txt @@ -0,0 +1,68 @@ +[Doel der vertaling.] + +Het doel van deze vertaling is den Nederlandschen lezer in kennis te +stellen met den volledigen inhoud van Dante's Gedicht. De vertaling +is zooveel mogelijk woordelijk, kan dus ook als handleiding dienen bij +het lezen en bestudeeren van den oorspronkelijken, Italiaanschen tekst. + +[Waarom in proza?] + +De vertaling is in Proza. + +Waarom? Omdat de woorden, waarin het Gedicht vervat is, den +dichter werden ingegeven in het scheppingsoogenblik door de +volheid zijner fantasieën, gevoelens en gedachten zelve. Ook in het +practisch-onmogelijke, maar theoretisch stelbare geval dat de vertaler +evenzeer vervuld ware als de dichter van hetgeen uitgedrukt moet +worden, zoude het onmogelijk zijn, dat de tweede, de Nederlandsche +dichter kwam tot een uitdrukkingsvorm, die ook maar eenigszins +gelijkliep met den vorm door den eersten, den Italiaanschen dichter +gevonden. Dante zelf zegt op dit stuk: "En daarom wete een ieder, +dat geen enkele zaak, door den band der muziek harmonisch uitgedrukt, +uit hare eigene taal in eene andere kan worden overgebracht, zonder +dat men al hare zoetheid en harmonie verbreke." + +[Naam v.h. gedicht.] + +Het hier den Nederlandschen lezer aangeboden werk is het eerste +van drie gedichten (Canzoni), "de Hel," "de Louteringsberg" en "het +Paradijs," door Dante tezamen genoemd "Comedia", om de eenvoudige +reden, dat het er in vervatte verhaal begint met 's Dichters tocht +door de Hel, dus met treurigheid, vervolgens handelt van 's Dichters +tocht langs den Louteringsberg en eindigt met 's Dichters tocht door +den Hemel, of het Paradijs, dus een blijden afloop heeft. Comedia +beteekent niet anders dan "blij-eindend Dicht." "Divina" is de Comedia +eerst later door een bewonderend nageslacht genoemd. + +[Wat de inl. behelst.] + +Het Gedicht, waarin deze tocht verhaald wordt, en alles tot de kleinste +bijzonderheden den lezer voor oogen wordt gesteld, kan eigenlijk +geheel voor zich zelf spreken. Daar echter de Dichter op zijn tocht +door die drie Rijken een ontzaggelijk groot aantal personen ontmoet, +zoowel uit zijn eigen als uit vroegere tijden, hebben wij, vooral tot +beter begrip van de gesprekken met personen uit 's Dichters eigen +tijd, gemeend den lezer geen onwelkomen dienst te bewijzen, door +eenige hoofdzaken aangaande 's Dichters leven en tijd mede te deelen. + +[Dante niet duister, wel diep.] + +Even wil ik nog den lezer op het hart drukken, dat het Gedicht nooit +duister is, wèl op sommige plaatsen zeer diep van zin, zoodat menige +plaats, behalve den eersten, bij de lezing onmiddellijk begrijpbaren +zin bij nadere beschouwing blijkt nog veel meer te bevatten. Zulke +plaatsen hebben dan ook aanleiding gegeven tot oneindige discussie, +ten onrechte, daar er van discussie geen kwestie mocht zijn, nl. van +een strijd van verschillende partijen, die ieder voor zich gelijk +willen hebben, maar wel van een wedstrijd wie het diepst in den zin +des dichters vermocht door te dringen. + +Maar vóór alles zij nog dit gezegd. Dante's Gedicht is niet maar eene +schildering van zijn tijd; het is de schildering van den mensch, +in al zijne vermogens en mogelijkheden, in al zijne eigenschappen, +zoowel die hem tot de diepste zonde, als die hem tot den hoogsten +heilstaat brengen. Daarom voert Dante, zelf alle ellende, loutering en +geleidelijk-groeiend geluk doorlevend, den mensch van de gruwelijkste +onvergoeilijke zonden, door die welke door boetedoeningen overwonnen +kunnen worden tot het hoogste zielegeluk, d. w. z. door de Hel, +langs den Louteringsberg naar den Hemel. diff --git a/vendor/elasticlunr-rs/tests/data/du.out.txt b/vendor/elasticlunr-rs/tests/data/du.out.txt new file mode 100644 index 000000000..c389132ac --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/du.out.txt @@ -0,0 +1,320 @@ +doel +vertal +doel +vertal +den +nederlandsch +lezer +kennis +stell +den +volled +inhoud +dante' +gedicht +vertal +zoovel +mogelijk +woordelijk +handleid +dien +lez +bestuder +den +oorspronk +italiaansch +tekst +waarom +proza +vertal +proza +waarom +woord +waarin +gedicht +vervat +den +dichter +werd +ingegev +scheppingsoogenblik +volheid +zijner +fantasieen +gevoelen +gedacht +zelv +practisch +onmog +theoretisch +stelbar +geval +vertaler +evenzer +vervuld +war +dichter +hetgen +uitgedrukt +zoud +onmog +twed +nederlandsch +dichter +kwam +uitdrukkingsvorm +eenigszin +gelijkliep +den +vorm +den +eerst +den +italiaansch +dichter +gevond +dant +zegt +stuk +daarom +wet +ieder +enkel +zak +den +band +muziek +harmonisch +uitgedrukt +har +eig +tal +een +overgebracht +har +zoetheid +harmonie +verbrek +nam +v.h +gedicht +den +nederlandsch +lezer +aangebod +werk +eerst +drie +gedicht +canzoni +hel +louteringsberg +paradijs +dant +tezam +genoemd +comedia +eenvoud +red +vervat +verhal +begint +s +dichter +tocht +hel +treurig +vervolgen +handelt +s +dichter +tocht +lang +den +louteringsberg +eindigt +s +dichter +tocht +den +hemel +paradijs +blijd +aflop +comedia +beteekent +ander +blij +eindend +dicht +divina +comedia +eerst +later +bewonder +nageslacht +genoemd +inl +behelst +gedicht +waarin +tocht +verhaald +kleinst +bijzonder +den +lezer +oog +gesteld +eigen +gehel +sprek +echter +dichter +tocht +drie +rijk +ontzagg +grot +aantal +person +ontmoet +zoowel +eig +vroeger +tijd +wij +vooral +beter +begrip +gesprek +person +s +dichter +eig +tijd +gemeend +den +lezer +onwelkom +dienst +bewijz +eenig +hoofdzak +aangaand +s +dichter +lev +tijd +med +del +dant +duister +wel +diep +even +den +lezer +hart +druk +gedicht +nooit +duister +wèl +sommig +plaats +zer +diep +zin +zoodat +menig +plat +behalv +den +eerst +lezing +onmiddel +begrijpbar +zin +nader +beschouw +blijkt +bevat +zulk +plaats +aanleid +gegev +oneind +discussie +ten +onrecht +discussie +kwestie +mocht +nl +strijd +verschill +partij +ieder +gelijk +will +wel +wedstrijd +diepst +den +zin +des +dichter +vermocht +dring +vor +gezegd +dante' +gedicht +een +schilder +tijd +schilder +den +mensch +zijn +vermogen +mogelijk +zijn +eigenschapp +zoowel +diepst +zond +den +hoogst +heilstat +breng +daarom +voert +dant +all +ellend +louter +geleid +groeiend +geluk +doorlev +den +mensch +gruwelijkst +onvergoei +zond +welk +boetedoen +overwonn +hoogst +zielegeluk +d +w +z +hel +lang +den +louteringsberg +den +hemel diff --git a/vendor/elasticlunr-rs/tests/data/en.in.txt b/vendor/elasticlunr-rs/tests/data/en.in.txt new file mode 100644 index 000000000..7e1209883 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/en.in.txt @@ -0,0 +1,111 @@ +It is a truth universally acknowledged, that a single man in possession +of a good fortune, must be in want of a wife. + +However little known the feelings or views of such a man may be on his +first entering a neighbourhood, this truth is so well fixed in the minds +of the surrounding families, that he is considered the rightful property +of some one or other of their daughters. + +“My dear Mr. Bennet,” said his lady to him one day, “have you heard that +Netherfield Park is let at last?” + +Mr. Bennet replied that he had not. + +“But it is,” returned she; “for Mrs. Long has just been here, and she +told me all about it.” + +Mr. Bennet made no answer. + +“Do you not want to know who has taken it?” cried his wife impatiently. + +“_You_ want to tell me, and I have no objection to hearing it.” + +This was invitation enough. + +“Why, my dear, you must know, Mrs. Long says that Netherfield is taken +by a young man of large fortune from the north of England; that he came +down on Monday in a chaise and four to see the place, and was so much +delighted with it, that he agreed with Mr. Morris immediately; that he +is to take possession before Michaelmas, and some of his servants are to +be in the house by the end of next week.” + +“What is his name?” + +“Bingley.” + +“Is he married or single?” + +“Oh! Single, my dear, to be sure! A single man of large fortune; four or +five thousand a year. What a fine thing for our girls!” + +“How so? How can it affect them?” + +“My dear Mr. Bennet,” replied his wife, “how can you be so tiresome! You +must know that I am thinking of his marrying one of them.” + +“Is that his design in settling here?” + +“Design! Nonsense, how can you talk so! But it is very likely that he +_may_ fall in love with one of them, and therefore you must visit him as +soon as he comes.” + +“I see no occasion for that. You and the girls may go, or you may send +them by themselves, which perhaps will be still better, for as you are +as handsome as any of them, Mr. Bingley may like you the best of the +party.” + +“My dear, you flatter me. I certainly _have_ had my share of beauty, but +I do not pretend to be anything extraordinary now. When a woman has five +grown-up daughters, she ought to give over thinking of her own beauty.” + +“In such cases, a woman has not often much beauty to think of.” + +“But, my dear, you must indeed go and see Mr. Bingley when he comes into +the neighbourhood.” + +“It is more than I engage for, I assure you.” + +“But consider your daughters. Only think what an establishment it would +be for one of them. Sir William and Lady Lucas are determined to +go, merely on that account, for in general, you know, they visit no +newcomers. Indeed you must go, for it will be impossible for _us_ to +visit him if you do not.” + +“You are over-scrupulous, surely. I dare say Mr. Bingley will be very +glad to see you; and I will send a few lines by you to assure him of my +hearty consent to his marrying whichever he chooses of the girls; though +I must throw in a good word for my little Lizzy.” + +“I desire you will do no such thing. Lizzy is not a bit better than the +others; and I am sure she is not half so handsome as Jane, nor half so +good-humoured as Lydia. But you are always giving _her_ the preference.” + +“They have none of them much to recommend them,” replied he; “they are +all silly and ignorant like other girls; but Lizzy has something more of +quickness than her sisters.” + +“Mr. Bennet, how _can_ you abuse your own children in such a way? You +take delight in vexing me. You have no compassion for my poor nerves.” + +“You mistake me, my dear. I have a high respect for your nerves. They +are my old friends. I have heard you mention them with consideration +these last twenty years at least.” + +“Ah, you do not know what I suffer.” + +“But I hope you will get over it, and live to see many young men of four +thousand a year come into the neighbourhood.” + +“It will be no use to us, if twenty such should come, since you will not +visit them.” + +“Depend upon it, my dear, that when there are twenty, I will visit them +all.” + +Mr. Bennet was so odd a mixture of quick parts, sarcastic humour, +reserve, and caprice, that the experience of three-and-twenty years had +been insufficient to make his wife understand his character. _Her_ mind +was less difficult to develop. She was a woman of mean understanding, +little information, and uncertain temper. When she was discontented, +she fancied herself nervous. The business of her life was to get her +daughters married; its solace was visiting and news.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/en.out.txt b/vendor/elasticlunr-rs/tests/data/en.out.txt new file mode 100644 index 000000000..951e8d11f --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/en.out.txt @@ -0,0 +1,358 @@ +truth +univers +acknowledg +singl +man +possess +good +fortun +want +wife +littl +known +feel +view +such +man +first +enter +neighbourhood +truth +well +fix +mind +surround +famili +consid +right +properti +on +daughter +mr +bennet +ladi +on +day +heard +netherfield +park +last +mr +bennet +repli +return +mr +long +here +told +mr +bennet +made +answer +want +know +taken +cri +wife +impati +_you_ +want +tell +object +hear +invit +enough +know +mr +long +netherfield +taken +young +man +larg +fortun +north +england +came +down +monday +chais +four +see +place +much +delight +agre +mr +morri +immedi +take +possess +befor +michaelma +servant +hous +end +next +week +name +bingley +marri +singl +oh +singl +sure +singl +man +larg +fortun +four +five +thousand +year +fine +thing +girl +affect +mr +bennet +repli +wife +tiresom +know +think +marri +on +design +settl +here +design +nonsens +talk +veri +_may_ +fall +love +on +therefor +visit +soon +come +see +occas +girl +go +send +themselv +perhap +still +better +handsom +mr +bingley +best +parti +flatter +certainli +_have_ +share +beauti +pretend +anyth +extraordinari +now +woman +five +grown +up +daughter +ought +give +over +think +beauti +such +case +woman +much +beauti +think +inde +go +see +mr +bingley +come +neighbourhood +more +engag +assur +consid +daughter +think +establish +on +sir +william +ladi +luca +determin +go +mere +account +gener +know +visit +newcom +inde +go +imposs +_us_ +visit +over +scrupul +sure +dare +mr +bingley +veri +glad +see +send +few +line +assur +hearti +consent +marri +whichev +choos +girl +though +throw +good +word +littl +lizzi +desir +such +thing +lizzi +bit +better +other +sure +half +handsom +jane +half +good +humour +lydia +alway +give +_her_ +prefer +none +much +recommend +repli +silli +ignor +girl +lizzi +someth +more +quick +sister +mr +bennet +_can_ +abus +children +such +way +take +delight +vex +compass +poor +nerv +mistak +high +respect +nerv +old +friend +heard +mention +consider +last +twenti +year +ah +know +suffer +hope +over +live +see +mani +young +men +four +thousand +year +come +neighbourhood +us +twenti +such +come +visit +depend +upon +twenti +visit +mr +bennet +odd +mixtur +quick +part +sarcast +humour +reserv +capric +experi +three +twenti +year +insuffici +make +wife +understand +charact +_her_ +mind +less +difficult +develop +woman +mean +understand +littl +inform +uncertain +temper +discont +fanci +herself +nervou +busi +life +daughter +marri +solac +visit +new diff --git a/vendor/elasticlunr-rs/tests/data/es.in.txt b/vendor/elasticlunr-rs/tests/data/es.in.txt new file mode 100644 index 000000000..8caf015da --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/es.in.txt @@ -0,0 +1,43 @@ +En el piso bajo de la izquierda de una humilde pero graciosa y limpia +casa de la calle de Preciados, calle muy estrecha y retorcida en aquel +entonces, y teatro de la refriega en tal momento, vivían[13] solas, esto +es, sin la compañía de hombre ninguno, tres buenas y piadosas[14] +mujeres, que mucho se diferenciaban entre sí en cuanto al ser físico y +estado social, puesto que éranse que se eran[15] una señora mayor, +viuda, guipuzcoana, de aspecto grave y distinguido; una hija suya, +joven, soltera, natural de Madrid, y bastante guapa, aunque de tipo +diferente al de la madre (lo cual daba a entender que había salido en +todo a su padre),[16] y una doméstica,[17] imposible de filiar o +describir, sin edad, figura ni casi sexo determinables, bautizada, hasta +cierto punto,[18] en Mondoñedo, y a la cual ya hemos hecho demasiado +favor (como también se lo hizo aquel señor Cura) con reconocer que +pertenecía a la especie humana... + +La mencionada joven parecía el símbolo o representación, viva y con +faldas,[19] del sentido común: tal equilibrio había entre su hermosura +y su naturalidad, entre su elegancia y su sencillez, entre su gracia y +su modestia. Facilísimo[20] era que pasase inadvertida por la vía +pública, sin alborotar a los galanteadores de oficio, pero imposible que +nadie dejara de admirarla[21] y de prendarse de sus múltiples +encantos,[22] luego que fijase en ella la atención.[23] + +No era, no (o, por mejor decir, no quería ser), una de esas beldades +llamativas, aparatosas, fulminantes, que atraen todas las miradas no +bien se presentan en un salón, teatro, o paseo, y que comprometen o +anulan al pobrete que las acompaña, sea novio, sea marido, sea padre, +sea el mismísimo Preste Juan de las Indias...[24] Era un conjunto sabio +y armónico de perfecciones físicas y morales, cuya prodigiosa +regularidad no entusiasmaba al pronto, como no entusiasman la paz y el +orden; o como acontece con los monumentos bien proporcionados, donde +nada nos choca ni maravilla hasta que[25] formamos juicio de que,[26] si +todo resulta llano, fácil y natural, consiste en que todo es igualmente +bello. Dijérase[27] que aquella diosa honrada de la clase media había +estudiado su modo de vestirse, de peinarse, de mirar, de moverse, de +conllevar, en fin, los tesoros de su espléndida juventud, en tal forma y +manera, que no se la creyese pagada[28] de sí misma, ni presuntuosa, ni +incitante, sino muy diferente de las deidades por casar que hacen feria +de sus hechizos y van por esas calles[29] de Dios diciendo a todo el +mundo: _Esta casa se vende... o se alquila_. + +Pero no nos detengamos en floreos ni dibujos,[30] que es mucho lo que +tenemos que referir, y poquísimo el tiempo de que disponemos. diff --git a/vendor/elasticlunr-rs/tests/data/es.out.txt b/vendor/elasticlunr-rs/tests/data/es.out.txt new file mode 100644 index 000000000..3866efee8 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/es.out.txt @@ -0,0 +1,219 @@ +pis +baj +izquierd +humild +gracios +limpi +cas +call +preci +call +estrech +retorc +aquel +entonc +teatr +refrieg +tal +moment +viv +sol +compañ +hombr +ningun +tres +buen +piad +mujer +diferenci +cuant +ser +fisic +social +puest +erans +señor +mayor +viud +guipuzcoan +aspect +grav +distingu +hij +jov +solter +natural +madr +bastant +guap +aunqu +tip +diferent +madr +dab +entend +sal +padr +domest +impos +fili +describ +edad +figur +casi +sex +determin +bautiz +ciert +punt +mondoñed +hech +demasi +favor +hiz +aquel +señor +cur +reconoc +pertenec +especi +human +mencion +jov +parec +simbol +represent +viv +fald +sent +comun +tal +equilibri +hermosur +natural +eleg +sencillez +graci +modesti +facilisim +pas +inadvert +via +public +alborot +galant +ofici +impos +nadi +dej +admir +prend +multipl +encant +lueg +fij +atencion +mejor +dec +quer +ser +beldad +llamat +aparat +fulmin +atra +tod +mir +bien +present +salon +teatr +pase +compromet +anul +pobret +acompañ +novi +mar +padr +mismisim +prest +juan +indi +conjunt +sabi +armon +perfeccion +fisic +moral +cuy +prodigi +regular +entusiasm +pront +entusiasm +paz +orden +acontec +monument +bien +proporcion +choc +maravill +form +juici +si +result +llan +facil +natural +cons +igual +bell +dijer +aquell +dios +honr +clas +medi +estudi +mod +vest +pein +mir +mov +conllev +fin +tesor +esplend +juventud +tal +form +maner +creyes +pag +mism +presuntu +incit +sin +diferent +deidad +cas +hac +feri +hechiz +van +call +dios +dic +mund +cas +vend +alquil +deteng +flore +dibuj +refer +poquisim +tiemp +dispon diff --git a/vendor/elasticlunr-rs/tests/data/fi.in.txt b/vendor/elasticlunr-rs/tests/data/fi.in.txt new file mode 100644 index 000000000..a33d6af7a --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/fi.in.txt @@ -0,0 +1,121 @@ +MÄKÄRÄ. + +_(Kumarrellen sakastin rappusilla.)_ Hyvä herra, armollinen +kirkonpalvelija... + +SUNTIO. + +Ka, mene, mene! Ei tänne saa tulla. + +MÄKÄRÄ. + +Enhän minä .. suokaa anteeksi.. Olisin vaan kaikessa nöyryydessäni +tullut herran temppeliin. + +SUNTIO. + +Sinä et kuitenkaan malta olla yhdessä kohden. Juoksentelet ympäri +kirkkoa ja höpiset jonkin joutavaa. + +MÄKÄRÄ. + +He, hee.. Höpisenkö minä?.. No ei sitten. Tämä pääkartano ei tahdo taas +pitää kutiaan. _(Hän nauraa kähäyttää.)_ + +SUNTIO. + +Senpätähden saat pysyä poikessa. + +MÄKÄRÄ. + +_(Vedet silmissä.)_ Ni-ni-niinhän tuota pitänee.. Kas, pääsky lensi.. +Mutta mitä pahaa minä sitten olen tehnyt, kun sinä minua ajelet kirkosta +pois? + +SUNTIO. + +Ethän sinä pysy yhdessä kohdenkaan, ja kun pappi saarnassaan muun muassa +sanoi: kyllä herra hullut hoitaa, niin silloin sinä heti ääneen huusit +että kyllä se hoitaa. Sopiiko tämä nyt kirkossa? + +MÄKÄRÄ. + +_(On hetken alakuloisen näköinen, mutta sitten iloisesti.)_ Mutta +hoitaakinhan se. Mitäs pahaa siinä on? + +SUNTIO. + +Eihän kirkossa saa huutaa ja juoksennella. Ymmärräthän sen sinäkin. + +MÄKÄRÄ. + +Niin .. niinhän se on, eihän sitä saisi, vaan kun ne henget viettelevät, +niin minkäpä sille taitaa. + +SUNTIO. + +Miten ne henget viettelevät? + +MÄKÄRÄ. + +Nekö? Nehän pitävät Mäkärää aivan narrillaan. Suhkavat kirkossakin +korvaan: juokse, juokse!.. ja silloin täytyy juosta. Toinen tulee ja +kuiskaa: elä vainenkaan juokse, elä vainenkaan juokse, mutta huuda .. ja +minä huudan. Muutoin ne tekisivät kerrassa kummia, niitä täytyy totella. +Kyllä ne kirkossa kumminkin vähän siivommalla ovat, mutta, vie sun, kun +ne kotona vehkeilevät, niin jos siinä ei ole sen seitsemässäkin +höyräkässä. Se pappa-piru on -- koira vieköön -- kaikista ilkein .. +katsos.. _(Hän levittää kätensä ja sormensa, panee naamansa hyvin julman +näköiseksi, irvistää ja hyppää suntioon päin.)_ Näin se tekee. + +_(SUNTIO ärjäsee ja vetäytyy säikähtäen taaksepäin.)_ + +MÄKÄRÄ. + +_(Nauraa viekkaasti ja räpyttää silmiään.)_ Ei tämä mitään Ole sen +suhteen.. Vaan niin se tekee ja ottaa kirveen penkin alta ja huutaa: +Mäkärä, pane pää pölkylle! Ja jos minä silloin en olisi tiukkana ja +sukkelana, niin arvaathan sen, mitä se piru silloin tekisi. Mutta minä +hyppään näin ikään .. taaksepäin, kun se tulee kirveineen ja manaan +häntä jumalan nimessä menemään pois. Sitten se vähitellen vetäytyy +jonkun pimeän nurkan kautta pellolle, mutta kauvanhan peijakkaan mustat +silmät sieltä vielä kiiluvat. + +SUNTIO. + +Katsos peijakasta. Kyllä kai se minua peloittaisi. + +MÄKÄRÄ. + +Vaarassa niiden kanssa toki onkin tuolla kulkiessaan. Kun vaan kaivon +kohdalle sattuu, niin silloin tuo, joka on kaikista suurin roisto, tuo +Ansgaarius, joka kirkkoonkin tulee .. se tuntuu aivan kuin niskasta +kiini ottavan ja suhkaa: Mäkärä, Mäkärä, hyppää kaivoon! Vaan silloin +minä pyöräytän sitä tuolla lailla .. ja alan juosta sen minkä käpälästä +lähtee.. Kas, kas; tuolla kun oriit tappelevat. + +SUNTIO. + +_(Juoksee katsomaan.)_ Missä .. missä? + +MÄKÄRÄ. + +_(Kiiruhtaa sakastin rappusia ylös päästäkseen kirkkoon.)_ Siellä, +siellä.. He, he, he!.. He, he, he! + +SUNTIO. + +_(Rientää heti Mäkärän perästä ja tapaa hänet kiini vaatteen liepeestä +juuri sakastin ovella.)_ Eläpäs menekään.. Kas peijakkaan, kun oli +sukkela. + +MÄKÄRÄ. + +_(Vetäytyy siivosti takaisin ja viekkaasti nauraen räpyttää silmiään.)_ + +Ilmanhan minä vaan säikäytin. Pidä vasta ovesi tarkemmin kiini, ettei +syntinen kirkkoon pääse.. Hähä! Jo tulen. + +_(SUNTIO menee sakastiin ja sukkelasti vetää oven kiini jälessään.)_ + +MÄKÄRÄ. diff --git a/vendor/elasticlunr-rs/tests/data/fi.out.txt b/vendor/elasticlunr-rs/tests/data/fi.out.txt new file mode 100644 index 000000000..5b016ced9 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/fi.out.txt @@ -0,0 +1,340 @@ +mäkär +kumarrel +sakast +rappus +hyvä +her +armollin +kirkonpalvelij +suntio +ka +mene +mene +tän +saa +tul +mäkär +en +suoka +ant +kaike +nöyryyd +tulu +her +temppel +suntio +kuite +mal +yhd +kohd +juoksentel +ympär +kirko +höpis +jon +joutav +mäkär +hee +höpis +no +sit +pääkartano +tahdo +taas +pitä +kutia +naura +kähäyt +suntio +senpätähd +saat +pysy +poike +mäkär +vede +silm +ni +ni +niin +pitän +kas +pääsky +len +paha +sit +tehny +ajel +kirko +pois +suntio +et +pysy +yhd +kohd +pap +saarn +muun +muas +sanoi +kyl +her +hulu +hoita +silo +heti +ääne +huusi +kyl +hoita +sopi +kirko +mäkär +hetk +alakulois +näköin +sit +ilois +hoitaak +mitäs +paha +suntio +eihä +kirko +saa +huuta +juoksen +ymmär +sinä +mäkär +niin +eihä +saisi +heng +viettelev +mink +taita +suntio +mite +heng +viettelev +mäkär +nekö +nehä +pitäv +mäkär +aiva +nar +suhkav +kirko +korv +juoks +juoks +silo +täytyy +juos +toine +tule +kuisk +elä +vaine +juoks +elä +vaine +juoks +huuda +huuda +muuto +tekisiv +ker +kum +täytyy +tote +kyl +kirko +kum +vähä +siivom +vie +sun +koto +vehkeilev +seitsem +höyräk +pap +piru +koira +viekö +kaik +ilk +katsos +levit +käte +sorm +pane +naama +hyv +julm +näköis +irvist +hyp +suntio +päin +näin +teke +suntio +ärjäs +vetäytyy +säikähtäe +taaksep +mäkär +naura +viekkaast +räpyt +silmiä +mitä +suht +teke +ot +kirv +pen +al +huuta +mäkär +pane +pää +pölky +silo +tiuk +sukkel +arv +piru +silo +teki +hyp +näin +ikä +taaksep +tule +kirv +mana +jumal +nime +menem +pois +sit +vähitel +vetäytyy +jonku +pimeä +nurk +kaut +pelo +kauva +peijak +must +silm +siel +vielä +kiiluv +suntio +katsos +peijak +kyl +kai +peloitai +mäkär +vaara +toki +on +kulkie +kaivo +kohd +satu +silo +kaik +suur +roisto +ansgaarius +kirko +tule +tuntu +aiva +nisk +kiini +ottav +suhk +mäkär +mäkär +hyp +kaivo +silo +pyöräyt +lail +ala +juos +käpäl +läht +kas +kas +ori +tappelev +suntio +juoks +katsom +mäkär +kiiruht +sakast +rappus +ylös +päästäks +kirko +siel +siel +suntio +rient +heti +mäkär +perä +tapa +kiini +vaat +liepe +juuri +sakast +ove +eläpäs +mene +kas +peijak +sukkel +mäkär +vetäytyy +siivost +takais +viekkaast +naurae +räpyt +silmiä +ilm +säikäyt +pidä +vas +ove +tarkem +kiini +etei +syntin +kirko +pääse +hähä +jo +tule +suntio +mene +sakast +sukkel +vetä +ove +kiini +jäle +mäkär diff --git a/vendor/elasticlunr-rs/tests/data/fr.in.txt b/vendor/elasticlunr-rs/tests/data/fr.in.txt new file mode 100644 index 000000000..4b246bb24 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/fr.in.txt @@ -0,0 +1,67 @@ +Zanette, c'était son nom de Jeanne, de Jeannette, comme elle le +prononçait en zézayant, lorsqu'elle était toute petite. Tel il lui était +resté. Ce qui, aussi, lui était resté, c'était sa grâce d'enfance, on ne +sait quoi de tout mignon, de plus jeune qu'elle-même. Elle était belle +de ses beaux seize ans, de son profil de Grecque, et de ses cheveux +noirs, qui, sous le hennin à l'arlésienne, pendaient lourdement sur la +blancheur dorée de son cou. + +Elle avait seize ans avec l'air d'en avoir douze. Pourtant, on sentait +la vie jeune et forte palpiter dans la chapelle, c'est-à-dire dans +l'entre-bâillement des fichus aux plis innombrables, qui laissent voir +un peu de la poitrine nue sur laquelle brille la croix d'or suspendue à +la chaînette des grand'mères. + +Zanette vivait à la ferme de la Sirène, bien tranquille à soigner ses +poules, ses lapins, auprès de son père, maître Augias, le bayle. À +l'ordinaire elle allait en Arles tous les dimanches. + +Et bien souvent, assise au bord du Petit Rhône, seule, sous les saules +et les aubes, elle rêvait en regardant l'eau, l'eau qui s'en allait vers +la mer, vers la mer si grande, où des bateaux vont et viennent, comme +des bêtes de rêve, comme de grands oiseaux aux ailes blanches.... Un +songe d'inconnu accompagnait toujours Zanette. Ses beaux seize ans +espéraient. + +...N'est-ce pas qu'elle porte un joli nom, la ferme de la Sirène? La +Sirène (la Sereno) si vous interrogez les paysans, ils vous le diront, +est un oiseau de passage, qui jamais ne s'arrête chez nous, et qui +traverse seulement notre ciel, très haut. Quelquefois, le laboureur, en +novembre, arrête son attelage, parce qu'il a entendu une harmonie +lointaine, confuse, comme un son prolongé de viole ou de mandoline.... + +Et il écoute, en rêvant.... + +Ce sont les sirènes qui passent là-haut, tout là-haut. Elles sont plus +petites que des tourterelles et leurs plumes miroitantes ont toutes les +couleurs de l'arc-en-ciel. On ne sait pas si la musique qu'elles font +sort de leur gosier ou vient simplement de le vibration de leurs ailes. +On croit plutôt que leur vol est harmonieux. Leur voix y ajoute une +seule note qui, de temps en temps, scande et domine la mélodie des +ailes.... Un jour, dit-on, comme on venait à peine de construire le +château et sa ferme, une sirène un instant se posa sur le bouquet de +tamaris en fleurs que les maçons plantent au bout d'une perche, sur la +toiture, dès qu'elle est achevée. Et le château, et la ferme qui le +touche, furent, voilà bien longtemps, baptisés du nom qu'ils portent +encore. + +Entre la ferme et la château, une vieille chapelle décrépite, où jadis +on disait la messe, se dresse, étroite et longue. + +On la dirait bâtie sur le modèle des huttes camarguaises. + +Les huttes sont en «tape», en argile desséchée, recouvertes de roseaux, +et la chapelle est en moellons, et recouverte de pierres plates, mais +les deux toits ont la même forme, celle d'un bateau long, la quille en +l'air; et sur leurs toitures, les cabanes, aussi bien que la chapelle, +portent toutes une croix penchée, comme renversée en arrière. Toutes ces +croix penchantes font songer au mistral éternel qui incline ainsi un peu +tous les arbres des plaines provençales, dans la même direction. Tous +ils gardent un peu la marque du vent maître, «magistral», à qui les +Romains avaient élevé un temple, comme à la puissance divine, +protectrice de ce pays qu'il balaye et assainit sans cesse.... Elles +donnent encore, les petites croix qu'on plante ainsi à dessein penchées, +l'impression des choses de la religion, à la fois vaincues et +résistantes. Elles sont là, tenaces mais inclinées, jamais arrachées +mais toujours penchantes, et elles disent le triomphe obstiné d'une foi +sans relâche battue des vents....
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/fr.out.txt b/vendor/elasticlunr-rs/tests/data/fr.out.txt new file mode 100644 index 000000000..7a8d0dc8b --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/fr.out.txt @@ -0,0 +1,369 @@ +zanet +c'et +nom +jeann +jeannet +comm +prononc +zézai +lorsqu'el +tout +petit +tel +rest +auss +rest +c'et +grâc +d'enfanc +sait +quoi +tout +mignon +plus +jeun +qu'el +bel +beau +seiz +an +profil +grecqu +cheveux +noir +sous +hennin +l'arlésien +pend +lourd +blancheur +dor +cou +seiz +an +l'air +d'en +avoir +douz +pourt +sent +vi +jeun +fort +palpit +chapel +c'est +dir +l'entr +bâill +fichus +plis +innombr +laissent +voir +peu +poitrin +nu +laquel +brill +croix +d'or +suspendu +chaînet +grand'mer +zanet +viv +ferm +siren +bien +tranquill +soign +poul +lapin +aupres +per +maîtr +augi +bayl +l'ordinair +allait +arle +tous +dimanch +bien +souvent +assis +bord +pet +rhôn +seul +sous +saul +aub +rêv +regard +l'eau +l'eau +s'en +allait +ver +mer +ver +mer +si +grand +où +bateau +vont +viennent +comm +bêt +rêv +comm +grand +oiseau +ail +blanch +song +d'inconnu +accompagn +toujour +zanet +beau +seiz +an +esper +n'est +qu'el +port +jol +nom +ferm +siren +siren +sereno +si +interrog +paysan +diront +oiseau +passag +jam +s'arrêt +chez +travers +seul +ciel +tres +haut +quelquefois +laboureur +novembr +arrêt +attelag +parc +qu'il +a +entendu +harmon +lointain +confus +comm +prolong +viol +mandolin +écout +rêv +siren +passent +là +haut +tout +là +haut +elle +plus +petit +tourterel +plum +miroit +tout +couleur +l'arc +ciel +sait +si +musiqu +qu'el +font +sort +gosi +vient +simpl +vibrat +ail +croit +plutôt +vol +harmoni +voix +ajout +seul +not +temp +temp +scand +domin +mélod +ail +jour +dit +comm +ven +pein +construir +château +ferm +siren +instant +pos +bouquet +tamar +fleur +maçon +plantent +bout +d'un +perch +toitur +des +qu'el +achev +château +ferm +touch +voilà +bien +longtemp +baptis +nom +qu'il +portent +encor +entre +ferm +château +vieil +chapel +décrépit +où +jad +dis +mess +dress +étroit +longu +dir +bât +model +hutt +camarguais +hutt +tap +argil +dessech +recouvert +roseau +chapel +moellon +recouvert +pierr +plat +deux +toit +form +cel +d'un +bateau +long +quill +l'air +toitur +caban +auss +bien +chapel +portent +tout +croix +pench +comm +renvers +arrier +tout +croix +pench +font +song +mistral +éternel +inclin +ains +peu +tous +arbre +plain +provençal +direct +tous +gardent +peu +marqu +vent +maîtr +magistral +romain +élev +templ +comm +puissanc +divin +protectric +pay +qu'il +balay +assain +cess +elle +donnent +encor +petit +croix +qu'on +plant +ains +dessein +pench +l'impress +chos +religion +fois +vaincu +résist +elle +là +tenac +inclin +jam +arrach +toujour +pench +elle +disent +triomph +obstin +d'un +foi +relâch +battu +vent diff --git a/vendor/elasticlunr-rs/tests/data/it.in.txt b/vendor/elasticlunr-rs/tests/data/it.in.txt new file mode 100644 index 000000000..beb57fb63 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/it.in.txt @@ -0,0 +1,90 @@ +Al cadere d'una bella giornata d'aprile dell'anno 1503 la campana +di San Domenico in Barletta sonava gli ultimi tocchi dell'avemaria. +Sulla piazza vicina in riva al mare, luogo di ritrovo degli abitanti +tranquilli che, nelle terricciuole dei climi meridionali specialmente, +sogliono sulla sera essere insieme a barattar parole al sereno per +riposarsi dalle faccende del giorno, stavano col fine medesimo +dispersi in varj gruppi molti soldati spagnuoli ed italiani, alcuni +passeggiando, altri fermi, o seduti, od appoggiati alle barche tirate +a secco, delle quali era ingombra la spiaggia, e, com'è costume delle +soldatesche d'ogni età e d'ogni nazione, il loro contegno era tale +che pareva dire: il mondo è nostro. Di fatto, lasciato loro il campo +migliore, si tenevano i terrazzani in disparte, dando così a questa +loro burbanza tacita approvazione. Chi per figurarsi questo quadro si +volesse rappresentare una simile radunata de' nostri soldati moderni +nella loro misera _uniforme_, sarebbe lontano assai dall'averne una +giusta immagine. L'esercito di Consalvo, le fanterie specialmente, +quantunque le meglio in arnese, e le migliori di tutta cristianità, +non conoscevano però, più di qualunque altra milizia del secolo XVI, +la stretta disciplina moderna, che è giunta a render simili un soldato +all'altro dalle scarpe al cappello. Qui invece, ogni uomo che facesse +il mestier dell'arme a piede o a cavallo, poteva vestirsi, armarsi ed +adornarsi come più gli piacesse; onde nasceva fra questa turba una +mirabile varietà e vaghezza nelle fogge, ne' colori e nel portamento, +dal quale si poteva facilmente conoscere a qual nazione appartenesse +ogni individuo. Gli Spagnuoli, per lo più serii, immobili, atteggiati +da bravacci, ed avvolti (o com'essi dicono _embozados_) nella _capa_ +nazionale, dalla quale si vedeva uscir per di sotto la lunga e sottil +lama di Toledo; gl'Italiani loquaci e pronti al gestire, in sajo od in +farsetto colla daga pistolese appesa dietro le reni. + +Al sonare della campana era cessato il susurro, e scomparendo la +maggior parte de' cappelli, le teste eran rimaste scoperte, perchè in +quel tempo anche i soldati credevano in Dio, e talvolta lo pregavano. +Dopo piccola pausa tornarono a luogo i cappelli, ricominciò il +bisbiglio; e benchè quella turba presa insieme avesse al primo aspetto +un non so che di gajo e di vivace, si poteva tuttavia facilmente +avvedersi, girando fra i diversi crocchi, esservi un motivo comune di +tristezza e di scoramento, al quale erano volte le menti e le parole +di tutti. Infatti il motivo era vero e possente. La fame cominciava +a farsi sentire fra i soldati ed anche fra gli abitanti di Barletta, +ove il gran Capitano, aspettando i tardi ajuti di Spagna, teneva +chiuso l'esercito di troppo inferiore a quello dei Francesi perchè +s'arrischiasse commetter la somma delle cose alla fortuna d'una +giornata. + +Tre lati della piazza erano chiusi da certe povere case di marinaj e +pescatori, dalla chiesa e dall'osteria. Il quarto s'apriva alla marina, +ingombro, com'è costume di tali luoghi, di barche, reti e di altri +attrezzi pescherecci; ed all'ultima linea dell'orizzonte si vedeva +sorgere dal seno delle acque la bruna forma del monte Gargano, sulla +cui vetta andava morendo l'ultimo raggio del sole cadente. + +Nello spazio frapposto, veleggiava chetamente un legno sottile; e si +volgeva tratto tratto per cercare il vento che soffiava incostante in +quel golfo, increspando qua e là a lunghe strisce la superficie del +mare. La distanza tuttavia della nave e la dubbia luce del crepuscolo +non lasciavano distinguere qual fosse la sua bandiera. + +Uno Spagnuolo, che insieme con molti soldati era presso alla riva, +la guardava fisso, aguzzando le ciglia, ed attorcigliandosi certi +grandissimi baffi più bigi che neri. + +--Che cosa guardi che sembri una statua, e non dai retta a chi discorre +con te?-- + +Quest'apostrofe d'un soldato napoletano, che non avendo ottenuta +risposta ad una prima domanda, se l'aveva per male, non mosse nè punto +nè poco l'imperturbabile Spagnuolo. Alla fine con un sospiro che pareva +uscire più da un mantice che dal petto d'un uomo, disse: + +--_Voto a Dios que nuestra segnora de Gaeta_, che manda buon vento e +buon cammino a tanti che la pregano in mare, potrebbe mandar ora questa +fusta a noi che la preghiamo in terra, e non abbiamo da metter sotto i +denti altro che il calcio dell'archibuso! Chi sa che non porti grano e +provvisioni a quei _descomulgados_ di Francesi che ci tengono stretti +in questa gabbia per farci morir di fame...... _Y mala Pasqua me de +Dios y sea la primera que viniere, si a su gracia el segnor Gonzalo +Hernandez_[1] quando ha ben pranzato e meglio cenato gl'importa di noi +più che del _cuero de sus zapatos_[2]. + +--Che cosa può far Consalvo?--rispose con istizza il Napoletano, +contento di contraddire:--dovrà diventar pane per entrar in corpo ad +una bestia come te? Quando ne avrà, ne darà; e le navi che il malanno +loro ha portate nelle secche di Manfredonia, chi l'ha divorate? +Consalvo, o voi altri?-- + +Lo Spagnuolo un po' mutato in viso mostrava di voler rispondere, ma +fu interrotto da un altro del crocchio, il quale, battendogli sulla +spalla, scuotendo la testa, ed abbassando la voce, come per dar maggior +peso alle parole,
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/it.out.txt b/vendor/elasticlunr-rs/tests/data/it.out.txt new file mode 100644 index 000000000..67b4d72c2 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/it.out.txt @@ -0,0 +1,511 @@ +cad +d'un +bell +giorn +d'april +dell'ann +campan +san +domen +barlett +son +ultim +tocc +dell'avemar +piazz +vicin +riv +mar +luog +ritrov +abit +tranquill +terricciuol +clim +meridional +special +sogl +ser +esser +insiem +baratt +parol +seren +ripos +facc +giorn +fin +medesim +disp +varj +grupp +molt +sold +spagnuol +italian +alcun +passegg +altri +ferm +sed +od +appogg +barc +tir +secc +qual +ingombr +spiagg +com' +costum +soldatesc +d'ogn +età +d'ogn +nazion +contegn +tal +par +dir +mond +fatt +lasc +camp +miglior +ten +terrazzan +dispart +dand +cos +burbanz +tac +approv +figur +quadr +voless +rappresent +simil +radun +de +sold +modern +miser +uniform +lont +assa +dall'av +giust +immagin +l'eserc +consalv +fanter +special +quantunqu +megl +arnes +miglior +tutt +cristian +conosc +per +qualunqu +altra +miliz +secol +xvi +strett +disciplin +modern +giunt +render +simil +sold +all'altr +scarp +cappell +qui +invec +ogni +uom +mestier +dell'arm +pied +cavall +pot +vest +armars +adorn +piacess +onde +nasc +fra +turb +mirabil +variet +vaghezz +fogg +color +port +pot +facil +conosc +qual +nazion +apparteness +ogni +individu +spagnuol +ser +immobil +attegg +bravacc +avvolt +com'ess +dic +embozados +cap +nazional +ved +uscir +sott +lung +sottil +lam +toled +gl'italian +loquac +pront +gest +saj +od +farsett +coll +dag +pistoles +appes +dietr +ren +son +campan +cess +susurr +scompar +maggior +part +de +cappell +test +eran +rimast +scopert +perc +quel +temp +sold +cred +dio +talvolt +preg +dop +piccol +paus +torn +luog +cappell +ricominc +bisbigl +benc +turb +pres +insiem +prim +aspett +so +gaj +vivac +pot +tuttav +facil +avved +gir +fra +div +crocc +esserv +mot +comun +tristezz +scor +volt +ment +parol +infatt +mot +ver +possent +fam +cominc +fars +sent +fra +sold +fra +abit +barlett +ove +gran +capit +aspett +tard +ajut +spagn +ten +chius +l'eserc +tropp +inferior +frances +perc +s'arrisc +commetter +somm +cos +fortun +d'un +giorn +tre +lat +piazz +chius +cert +pov +cas +marinaj +pescator +chies +dall'oster +quart +s'apr +marin +ingombr +com' +costum +tal +luog +barc +ret +altri +attrezz +pescherecc +all'ultim +line +dell'orizzont +ved +sorg +sen +acque +brun +form +mont +garg +vett +andav +mor +l'ultim +ragg +sol +cadent +spaz +frappost +velegg +chet +legn +sottil +volg +tratt +tratt +cerc +vent +soff +incost +quel +golf +incresp +qua +là +lung +strisc +superfic +mar +distanz +tuttav +nav +dubb +luc +crepuscol +lasc +distingu +qual +bandier +spagnuol +insiem +molt +sold +press +riv +guard +fiss +aguzz +cigl +attorcigl +cert +grandissim +baff +big +ner +cos +guard +sembr +statu +rett +discorr +te +quest'apostrof +d'un +sold +napolet +otten +rispost +prim +domand +l'av +mal +moss +nè +punt +nè +poc +l'imperturb +spagnuol +fin +sospir +par +uscir +mantic +pett +d'un +uom +diss +vot +dios +que +nuestr +segnor +de +gaet +mand +buon +vent +buon +cammin +tant +preg +mar +potrebb +mand +ora +fust +preg +terr +metter +sott +dent +altro +calc +dell'archibus +sa +port +gran +provvision +que +descomulgados +frances +teng +strett +gabb +farc +mor +fam +y +mal +pasqu +me +de +dios +y +sea +primer +que +vin +grac +el +segnor +gonzal +hernandez +quand +ben +pranz +megl +cen +gl'import +cuer +de +sus +zapatos +cos +può +far +consalv +rispos +istizz +napolet +content +contradd +dovr +divent +pan +entrar +corp +best +te +quand +dar +nav +malann +port +secc +manfredon +l'ha +divor +consalv +altri +spagnuol +po +mut +vis +mostr +voler +rispond +interrott +altro +crocc +batt +spall +scuot +test +abbass +voc +dar +maggior +pes +parol diff --git a/vendor/elasticlunr-rs/tests/data/ja.in.txt b/vendor/elasticlunr-rs/tests/data/ja.in.txt new file mode 100644 index 000000000..abc7f1dbc --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ja.in.txt @@ -0,0 +1,20 @@ +バイト仲間で、ものすごく気の合うメンバーがいた。 +なぜ気が合うかというと、共通の上司がヤバいやつだったからだった。 +どうヤバいかここで説明するのは割愛する。主題からずれるので。 +そのヤバい上司の愚痴を言っている間は、お互い仲間意識を持っていたように思う。 +月日は流れ、私はそこを辞め、しばらくしてその気が合うメンバーも辞めた。 +何回かその後、そのメンバーと合っているうちに、意見の衝突などから、険悪な雰囲気になることが増えた。 +そして私も面倒なので、もう会わないようになった。 +それからもう何年も経つ。 + +「共通の敵」を作ると、結束が固まるという話はよく聞くが、それは非常に壊れやすいものと思う。 +敵が去ったあとは、内紛が起こる。 +人類に文明が発生してから、地球上のあらゆる場所で繰り返してきたことかもしれない。 + +漢の劉邦が中国を統一したとき、つまり宿敵項羽を倒したあと、敵がいなくなった。 +その時、今まで一緒に戦ってきた功臣の何人かは、劉邦によって降格されたり、またそれを恨みに思った功臣が謀反を起こし、一族ごと処罰を受けたりもした。 +ただ、軍師の張良は天下統一の後、「政治には興味ありません。オカルトの研究だけはさせてください」と自分は安全であるというアピールをしたからなのか助かったようである。 +軍師として成果を上げるくらいだから、やはり人間の性質を理解していたようだ。 + +こんな感じで、共通の敵がいるという条件下で育まれた友情のようなものは、いずれ崩壊する儚いものであると考えておくのがいい。 +喫煙所で上司の悪口を言ってる暇があったら、自分のスキルアップに時間を使う方が有益と言える。 diff --git a/vendor/elasticlunr-rs/tests/data/ja.out.txt b/vendor/elasticlunr-rs/tests/data/ja.out.txt new file mode 100644 index 000000000..4169e0aaa --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ja.out.txt @@ -0,0 +1,215 @@ +バイト +仲間 +ものすごく +気 +合う +メンバ +い +なぜ +気 +合う +いう +共通 +上司 +い +やつ +どう +いか +ここ +説明 +する +の +割愛 +する +主題 +ずれる +その +い +上司 +愚痴 +言っ +いる +間 +お互い +仲間 +意識 +持っ +い +よう +思う +月日 +流れ +私 +そこ +辞め +しばらく +し +その +気 +合う +メンバ +辞め +何 +回 +その後 +その +メンバ +合っ +いる +うち +意見 +衝突 +険悪 +雰囲気 +なる +こと +増え +そして +私 +面倒 +もう +会わ +よう +なっ +それから +もう +何 +年 +経つ +共通 +敵 +作る +結束 +固まる +話 +よく +聞く +それ +非常 +壊れ +やすい +もの +思う +敵 +去っ +あと +内紛 +起こる +人類 +文明 +発生 +し +地球 +上 +あらゆる +場所 +繰り返し +き +こと +しれ +漢 +劉邦 +中国 +統一 +し +とき +つまり +宿敵 +項羽 +倒し +あと +敵 +い +なっ +その +時 +今 +一緒 +戦っ +き +功臣 +何 +人 +劉邦 +降格 +さ +れ +また +それ +恨み +思っ +功臣 +謀反 +起こし +一族 +ごと +処罰 +受け +し +ただ +軍師 +張 +良 +天下 +統一 +後 +政治 +興味 +あり +研究 +さ +せ +ください +自分 +安全 +アピール +し +の +助かっ +よう +軍師 +成果 +上げる +やはり +人間 +性質 +理解 +し +い +よう +こんな +感じ +共通 +敵 +いる +条件下 +育ま +れ +友情 +よう +もの +いずれ +崩壊 +する +儚い +もの +考え +おく +の +いい +喫煙 +所 +上司 +悪口 +言っ +てる +暇 +あっ +自分 +スキル +アップ +時間 +使う +方 +有益 +言える diff --git a/vendor/elasticlunr-rs/tests/data/no.in.txt b/vendor/elasticlunr-rs/tests/data/no.in.txt new file mode 100644 index 000000000..0e4b9e229 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/no.in.txt @@ -0,0 +1,144 @@ +Hver dag blir vi litt klokere på pandemien som har snudd samfunnet +vårt på hodet. Hver dag er vi ett skritt nærmere en vaksine og ett +skritt nærmere det som skal være vår nye hverdag, etter krisen. Jeg +tror vi alle kjenner på at det tærer litt på nå – syv måneder nærmest +i unntakstilstand, med mer usikkerhet og mindre frihet enn vi noen +gang trodde vi skulle oppleve. + +Vi fikk raskt kontroll på smitten. Og vi kunne bruke våre økonomiske +muskler til å dempe tilbakeslaget. Gjeninnhentingen kom raskere enn vi +så for oss. Men vi blir stadig minnet på hvor skjør situasjonen er. + +Å hindre nye smitteutbrudd er den viktigste jobben fremover også, +antakelig langt inn i neste år. Den jobben kan ingen gjøre alene. Vi +må gjøre den sammen. Hver og en av oss må fortsette å holde avstand, +vi må vaske hendene og være hjemme hvis vi er syke, for å beskytte de +mest sårbare blant oss, for å ta vare på arbeidsplassene, for at alt +skal bli bra igjen – selv om vi kjenner at det røyner på. + +De siste månedene har jeg møtt folk og bedrifter fra hele Norge for å +høre deres historier og deres tanker om fremtiden. Det er ett ord som +går igjen: usikkerhet. Men det er ofte med en betryggende undertone av +optimisme og innsatsvilje – betryggende fordi vi må ha med privat +næringsliv på laget hvis vi skal ri denne stormen av uten å miste +kurs. Det beste vi kan gjøre, er å legge til rette for at det kan skje +– og vise vei. + +Med budsjettet for 2021 forsterker regjeringen det langsiktige +arbeidet for å styrke bærekraften og konkurransekraften i norsk +økonomi. Den jobben startet vi i 2013. For å trygge jobbene og fremme +omstilling har vi investert mer i kunnskap, i forskning, i +infrastruktur og i vekstfremmende skattelettelser. + +Prioriteringene i budsjettet skal bringe Norge mot seks mål som alle +handler om å komme gjennom denne krisen, uten å miste de langsiktige +perspektivene av syne. + +Det første målet handler om å få folk tilbake i jobb. I mars steg +antallet permitterte dramatisk. På det meste var mer enn hver tiende +person i arbeidsstyrken registrert som helt ledig hos Nav. Bildet har +bedret seg siden den gang. Men fortsatt er mer enn 100 000 mennesker +helt arbeidsledige i Norge. Det er altfor mange. + +Det viktigste for å bevare et samfunn med små forskjeller og gode +velferdsordninger er at flest mulig er i jobb. Derfor må vi unngå at +ledigheten nå biter seg fast på et høyt nivå. Vi må være spesielt +oppmerksom på ungdom som er på vei ut i arbeidslivet. Og vi må unngå +at de som i utgangspunktet hadde svak tilknytning til arbeidslivet, +støtes varig ut. + +Å legge til rette for nye arbeidsplasser i privat sektor har vært en +viktig del av den økonomiske politikken regjeringen har ført i syv +år. Før krisen var sysselsettingsandelen på vei opp. Vi må tilbake til +det sporet. + +Siden mars har vi iverksatt kraftfulle tiltak for å gi økonomisk +trygghet til alle de som plutselig fikk inntektsgrunnlaget revet bort, +for å sikre at kommunene og helsetjenesten har ressurser nok til å +fortsette å ta vare på oss, og for å hjelpe levedyktige bedrifter +gjennom denne krevende perioden. Tiltakene har virket. Aktiviteten i +norsk økonomi har tatt seg opp igjen etter den dramatiske nedgangen i +vår. Men gjeninnhentingen er skjør, krisen er ikke over. Vi må ta +høyde for at mange bedrifter – og særlig i næringer som reiseliv og +kultur – fortsatt vil være rammet av smitteverntiltak som begrenser +aktiviteten, og at ordrebøkene til eksportbedriftene antakelig vil +tynnes ut på grunn av svak etterspørsel fra landene vi handler +med. Hos handelspartnerne våre har den økonomiske nedgangen vært enda +dypere, og gjeninnhentingen har vært svakere. + +I budsjettet for 2021 legger vi opp til at oljepengebruken skal være +på 313 mrd. kr. Det tilsvarer 3 pst. av fondsverdien og betyr at vi +allerede neste år kan være tilbake på den langsiktige rettesnoren for +bærekraftig bruk av oljeinntektene. Men det er samtidig over 60 +mrd. kr mer enn vi brukte i 2019. + +Budsjettet for neste år vil virke ekspansivt i den økonomiske +politikken. Alt fra bygging av vei og bane, investeringer i forsvaret, +overføringer til kommuner, fylker og helsetjenester, nye +byggeprosjekter og ulike støtteordninger skaper aktivitet og arbeid +over hele landet. I tillegg vil vi forsterke satsingen på +arbeidsmarkedstiltak for dem som står uten jobb. + +Koronapandemien kommer til å endre vanene våre. Vi har blitt mer +digitale. Mye tyder på at vi kommer til å reise mindre og være mer på +hjemmekontor. Varige endringer er endringer som næringslivet må +tilpasse seg. Vi må hjelpe næringslivet gjennom krisen – uten å svekke +innovasjonskraften og uten å ødelegge omstillingsevnen. Det er en +krevende balansegang. Bedrifter går konkurs – i gode tider og i +vanskelige tider. Vi politikere må være kloke nok til å erkjenne at vi +ikke vet hvem som er morgendagens vinnere. Markedet må avgjøre hvilke +bedrifter som skal være med videre. + +Norge har naturressurser, teknologi og kompetanse som gir oss +fantastiske muligheter. Det andre målet i regjeringens strategi er at +vi må sikre flere ben å stå på. Vi må fortsette å fornye Norge. Vi +trenger flere jobber, i flere bransjer, over hele landet. Og veksten +må komme i privat næringsliv. + +Gjennom syv år i regjering har vi gjort mye for å bedre +rammebetingelsene for bedriftene. Konkurranseevnen er kraftig bedret, +bl.a. fordi bedriftene tar i bruk ny teknologi, og fordi vi har senket +skattene ned mot nivået i land vi konkurrerer med. Vi har prioritert +lavere selskapsskatt, lavere marginalskatt på arbeid og lavere +formuesskatt fordi det gir arbeidsplasser og arbeidslyst. + +Nå kutter vi skattene enda mer, med brede lettelser i inntektsskatten, +som lavere trinnskatt, høyere minstefradrag både på lønn og på pensjon +og videre nedtrapping av skatten på arbeidende kapital. Det kan gi +norske eiere muskler til å investere i norske arbeidsplasser. Og vi +øker skattefordelen for dem som kjøper aksjer i bedriften de er ansatt +i. Vi ønsker at flere medarbeidere skal bli medeiere i bedriftene. + +Vi foreslår også å endre vannkraftbeskatningen og tilfører næringen +betydelig likviditet. Det vil legge til rette for investeringer og +nødvendige oppgraderinger i en næring som skaper aktivitet i +lokalsamfunn over hele landet. + +I tillegg vil vi øke frikortgrensen til 60 000 kr. Det kommer ungdom +til gode, som nå kan beholde litt mer av de pengene de tjener. + +Fra 1. juli neste år vil vi også innføre kildeskatt på enkelte +betalinger til nærstående selskap i lavskatteland. Formålet er å +motvirke overskuddsflytting, unngå at inntekter som skapes i Norge, +blir beskattet i et annet land med lavere skatt. + +Skatt og avgift er den klart største inntektskilden i +statsbudsjettet. Å beskytte det norske skattegrunnlaget er derfor helt +nødvendig for at vi skal kunne holde skattene lave for våre bedrifter +og arbeidsplasser. + +Med dette budsjettet vil en vanlig familie betale 14 000 kr mindre i +skatt neste år enn om det rød-grønne skattenivået fra 2013 hadde blitt +videreført – 14 000 kr for en vanlig familie. + +Til sammen er skatte- og avgiftsnivået redusert med nesten 30 mrd. kr +i vår regjeringsperiode. + +Gjennom sterk satsing på forskning og utvikling skaper regjeringen +grobunn for omstilling og vekst. I 2021 øker vi FoU-bevilgningene med +mer enn 2 mrd. kr, til over 45 mrd. kr. Pengene skal bl.a. gå til å +trappe opp langtidsplanen for forskning og høyere utdanning og til +deltagelse i Horisont Europa og EUs romprogram. Norske bedrifter og +forskere hevder seg bra i konkurransen om penger fra +EU-programmene. Deltagelse der bidrar til nye jobber her og til at vi +løser mange store samfunnsoppgaver. diff --git a/vendor/elasticlunr-rs/tests/data/no.out.txt b/vendor/elasticlunr-rs/tests/data/no.out.txt new file mode 100644 index 000000000..29353b36d --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/no.out.txt @@ -0,0 +1,660 @@ +dag +litt +kloker +pandemi +snudd +samfunn +vårt +hod +dag +skritt +nærmer +vaksin +skritt +nærmer +nye +hverdag +kris +tror +kjenn +tær +litt +syv +måned +nærmest +unntakstilstand +mer +usikker +mindr +frih +gang +trodd +opplev +fikk +raskt +kontroll +smitt +bruk +vår +økonomisk +muskl +demp +tilbakeslag +gjeninnhenting +rasker +stad +minn +skjør +situasjon +hindr +nye +smitteutbrudd +viktigst +jobb +fremov +antak +langt +nest +år +jobb +gjør +alen +må +gjør +samm +må +fortsett +hold +avstand +må +vask +hend +hjemm +syk +beskytt +mest +sårbar +blant +ta +var +arbeidsplass +alt +bra +igj +kjenn +røyn +sist +måned +møtt +folk +bedrift +hel +norg +hør +histori +tank +fremtid +ord +går +igj +usikker +oft +betrygg +underton +optimism +innsatsvilj +betrygg +må +privat +næringsliv +lag +ri +storm +mist +kur +best +gjør +legg +rett +skje +vis +vei +budsjett +forsterk +regjering +langsikt +arbeid +styrk +bærekraft +konkurransekraft +norsk +økonomi +jobb +start +trygg +jobb +fremm +omstilling +invester +mer +kunnskap +forskning +infrastruktur +vekstfremm +skattelett +prioritering +budsjett +bring +norg +seks +mål +handl +komm +gjennom +kris +mist +langsikt +perspektiv +syn +først +mål +handl +få +folk +tilbak +jobb +mar +steg +antall +permitter +dramatisk +mest +mer +tiend +person +arbeidsstyrk +registrer +helt +led +hos +nav +bild +bedr +gang +fortsatt +mer +mennesk +helt +arbeidsled +norg +altfor +viktigst +bevar +samfunn +små +forskjell +god +velferdsordning +flest +mul +jobb +derfor +må +unngå +led +bit +fast +høyt +nivå +må +spesielt +oppmerksom +ungdom +vei +arbeidsliv +må +unngå +utgangspunkt +svak +tilknytning +arbeidsliv +støt +var +legg +rett +nye +arbeidsplass +privat +sektor +vikt +del +økonomisk +politikk +regjering +ført +syv +år +kris +sysselsettingsandel +vei +må +tilbak +spor +mar +iverksatt +kraftfull +tiltak +gi +økonomisk +trygg +pluts +fikk +inntektsgrunnlag +rev +bort +sikr +kommun +helsetjenest +ressurs +nok +fortsett +ta +var +hjelp +levedykt +bedrift +gjennom +krev +period +tiltak +virk +aktivitet +norsk +økonomi +tatt +igj +dramatisk +nedgang +gjeninnhenting +skjør +kris +må +ta +høyd +bedrift +sær +næring +reiseliv +kultur +fortsatt +ramm +smitteverntiltak +begrens +aktivitet +ordrebøk +eksportbedrift +antak +tynn +grunn +svak +etterspørsel +land +handl +hos +handelspartnern +vår +økonomisk +nedgang +end +dyper +gjeninnhenting +svaker +budsjett +legg +oljepengebruk +mrd +kr +tilsvar +pst +fondsverdi +betyr +aller +nest +år +tilbak +langsikt +rettesnor +bærekraft +bruk +oljeinntekt +samtid +mrd +kr +mer +brukt +budsjett +nest +år +virk +ekspansiv +økonomisk +politikk +alt +bygging +vei +ban +investering +forsvar +overføring +kommun +fylk +helsetjenest +nye +byggeprosjekt +ulik +støtteordning +skap +aktivit +arbeid +hel +land +tillegg +forsterk +satsing +arbeidsmarkedstiltak +står +jobb +koronapandemi +komm +endr +van +vår +mer +digital +mye +tyd +komm +reis +mindr +mer +hjemmekontor +var +endring +endring +næringsliv +må +tilpass +må +hjelp +næringsliv +gjennom +kris +svekk +innovasjonskraft +ødelegg +omstillingsevn +krev +balansegang +bedrift +går +konkur +god +tid +vansk +tid +politiker +må +klok +nok +erkjenn +vet +morgendag +vinner +marked +må +avgjør +bedrift +vider +norg +naturressurs +teknologi +kompetans +gir +fantastisk +mul +andr +mål +regjering +strategi +må +sikr +fler +ben +stå +må +fortsett +forny +norg +treng +fler +jobb +fler +bransj +hel +land +vekst +må +komm +privat +næringsliv +gjennom +syv +år +regjering +gjort +mye +bedr +rammebeting +bedrift +konkurranseevn +kraft +bedr +bl.a +bedrift +tar +bruk +ny +teknologi +senk +skatt +nivå +land +konkurrer +prioriter +laver +selskapsskatt +laver +marginalskatt +arbeid +laver +formuesskatt +gir +arbeidsplass +arbeidslyst +kutt +skatt +end +mer +bred +lett +inntektsskatt +laver +trinnskatt +høyer +minstefradrag +lønn +pensjon +vider +nedtrapping +skatt +arbeid +kapital +gi +norsk +eier +muskl +invester +norsk +arbeidsplass +øker +skattefordel +kjøp +aksj +bedrift +ansatt +ønsk +fler +medarbeider +medeier +bedrift +foreslår +endr +vannkraftbeskatning +tilfør +næring +betyd +likvidit +legg +rett +investering +nødvend +oppgradering +næring +skap +aktivit +lokalsamfunn +hel +land +tillegg +øke +frikortgrens +kr +komm +ungdom +god +behold +litt +mer +peng +tjen +juli +nest +år +innfør +kildeskatt +enkelt +betaling +nærstå +selskap +lavskatteland +formål +motvirk +overskuddsflytting +unngå +inntekt +skap +norg +beskatt +ann +land +laver +skatt +skatt +avgift +klart +størst +inntektskild +statsbudsjett +beskytt +norsk +skattegrunnlag +derfor +helt +nødvend +hold +skatt +lav +vår +bedrift +arbeidsplass +budsjett +van +famili +betal +kr +mindr +skatt +nest +år +rød +grønn +skattenivå +videreført +kr +van +famili +samm +skatt +avgiftsnivå +reduser +nest +mrd +kr +regjeringsperiod +gjennom +sterk +satsing +forskning +utvikling +skap +regjering +grobunn +omstilling +vekst +øker +fou +bevilgning +mer +mrd +kr +mrd +kr +peng +bl.a +gå +trapp +langtidsplan +forskning +høyer +utdanning +deltag +horisont +europ +eus +romprogram +norsk +bedrift +forsker +hevd +bra +konkurrans +peng +eu +programm +deltag +bidr +nye +jobb +løs +stor +samfunnsoppgav diff --git a/vendor/elasticlunr-rs/tests/data/pt.in.txt b/vendor/elasticlunr-rs/tests/data/pt.in.txt new file mode 100644 index 000000000..04df46de3 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/pt.in.txt @@ -0,0 +1,90 @@ +Eu poucas vezes canto os casos melancolicos, +Os lethargos gentis, os extasis bucolicos +E as desditas crueis do proprio coração; +Mas não celebro o vicio e odeio o desalinho +Da muza sem pudor que mostra no caminho +A liga á multidão. + +A sagrada poesia, a peregrina eterna, +Ouvi dizer que soffre uma affecção moderna, +Uns fastios sem nome, uns tedios ideaes; +Que ensaia, presumida, o gesto romanesco +E, vaidosa de si, no collo eburneo e fresco, +Põe crémes triviaes! + +Oh, pensam mal de ti, da tua castidade! +Deslumbra-os o fulgor dos astros da cidade, +Os falsos ouropeis das cortezãs gentis, +E julgam já tocar-te as roçagantes vestes +Ó deusa virginal das coleras celestes, +Das graças juvenis! + +Retine a cançoneta alegre das bachantes, +Saudadas nos wagons, nos caes, nos restaurantes, +Visões d'olhar travesso e provocantes pés, +E julgam já escutar a voz do paraiso, +Amando o que ha de falso e torpe no sorriso +Das musas dos cafés! + +Oh, tu não és, de certo, a virgem quebradiça +Estiolada e gentil, que vem depois da missa +Mostrar pela cidade o seu fino desdem, +Nem a fada que sente um vaporoso tedio +Emquanto vae sonhando um noivo rico e nédio +Que a possa pagar bem! + +Nem posso mesmo crêr, archanjo, que tu sejas +A menina gentil que ás portas das egrejas +Emquanto a multidão galante adora a cruz, +A bem do pobre enfermo á turba pede esmola +Nas pompas ideaes da moda, que a consola +Das magoas do Jesus! + +E nas horas de luta emquanto os povos choram +E a guerra tudo mata e os reis tudo devoram, +Não posso dizer bem se acaso tu serás +A senhora que espalha os languidos fastios +Nos pomposos salões, sorrindo a fazer fios +Á viva luz do gaz! + +Tu és a apparição gentil, meia selvagem, +D'olhar profundo e bom, de candida roupagem, +De fronte immaculada e seios virginaes, +Que desenha no espaço o limpido contorno +E cinge na cabeça o virginal adorno +De folhas naturaes. + +Tens a linha ideal das candidas figuras; +As curvas divinaes; as tintas sãs e puras +Da austera virgindade; as bellas correcções; +E segues magestosa em teu longo caminho +Deixando fluctuar a tunica de linho +Ás frescas virações! + +Quando trava batalha a tua irmã Justiça +Acodes ao combate e apontas sobre a liça +Uma espada de luz ao Mal dominador: +E pensas na belleza harmonica das cousas +Sentindo que se move um mundo sob as louzas +No germen d'uma flôr! + +N'um sorriso cruel, pungente d'ironia, +Tambem sabes vibrar, serena, altiva e fria, +O latego febril das grandes punições; +E vendo-te sorrir, a geração doente, +Sentir cuida, talvez, a nota decadente, +Das morbidas canções! + +Oh, vôa sem cessar traçando nos teus hombros +O manto constellado, ó deusa dos assombros, +Até chegar um dia ás regiões de luz, +Aonde, na poeira aurifera dos astros, +Contricto, Satanaz enxugará de rastos, +As chagas de Jesus! + +Logar á minha fada ó languidas senhoras! +E vós que amaes do circo as noites tentadoras, +Os fluctuantes véos, os gestos divinaes, +Podeis vel-a passar n'um turbilhão fantastico, +Voando no corcel febril, nervoso, elastico, +Dos novos ideaes!
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/pt.out.txt b/vendor/elasticlunr-rs/tests/data/pt.out.txt new file mode 100644 index 000000000..ee0cc5fd8 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/pt.out.txt @@ -0,0 +1,334 @@ +pouc +vez +cant +cas +melancol +letharg +gent +extas +bucol +desdit +cru +propri +coraçã +celebr +vici +odei +desalinh +muz +pudor +mostr +caminh +lig +á +multidã +sagr +poes +peregrin +etern +ouv +diz +soffr +affecçã +modern +uns +fasti +nom +uns +tedi +idea +ensa +presum +gest +romanesc +vaidos +si +coll +eburn +fresc +põ +crém +trivia +oh +pens +mal +ti +castidad +deslumbr +fulgor +astros +cidad +fals +ourop +cortezãs +gent +julg +toc +roçag +vest +ó +deus +virginal +col +cel +grac +juven +retin +cançonet +alegr +bachant +saud +wagons +caes +restaur +visõ +d'olh +travess +provoc +pés +julg +escut +voz +parais +amand +ha +fals +torp +sorris +mus +cafés +oh +és +cert +virg +quebradic +estiol +gentil +vem +miss +mostr +cidad +fin +desd +fad +sent +vapor +tedi +emquant +vae +sonh +noiv +ric +nédi +poss +pag +bem +poss +crêr +archanj +sej +menin +gentil +ás +port +egrej +emquant +multidã +galant +ador +cruz +bem +pobr +enferm +á +turb +ped +esmol +pomp +idea +mod +consol +mago +jesus +hor +lut +emquant +pov +chor +guerr +tud +mat +reis +tud +devor +poss +diz +bem +acas +serás +senhor +espalh +langu +fasti +pompos +salõ +sorr +faz +fios +á +viv +luz +gaz +és +appariçã +gentil +mei +selvag +d'olh +profund +bom +cand +roupag +front +immacul +sei +virgina +desenh +espac +limp +contorn +cing +cabec +virginal +adorn +folh +natura +tens +linh +ideal +cand +figur +curv +divina +tint +sãs +pur +aust +virgindad +bell +correcçõ +segu +magest +long +caminh +deix +fluctu +tunic +linh +ás +fresc +viraçõ +trav +batalh +irmã +justic +acod +combat +apont +sobr +lic +espad +luz +mal +domin +pens +bellez +harmon +cous +sent +mov +mund +sob +louz +germen +d'um +flôr +n'um +sorris +cruel +pungent +d'iron +tamb +sab +vibr +seren +altiv +fri +lateg +febril +grand +puniçõ +vend +sorr +geraçã +doent +sent +cuid +talvez +not +decadent +morb +cançõ +oh +vôa +cess +trac +hombr +mant +constell +ó +deus +assombr +cheg +dia +ás +regiõ +luz +aond +poeir +aurif +astros +contrict +satanaz +enxug +rast +chag +jesus +log +á +fad +ó +langu +senhor +vós +ama +circ +noit +tentador +fluctuant +véos +gest +divina +pod +vel +pass +n'um +turbilhã +fantast +voand +corcel +febril +nervos +elast +nov +idea diff --git a/vendor/elasticlunr-rs/tests/data/ro.in.txt b/vendor/elasticlunr-rs/tests/data/ro.in.txt new file mode 100644 index 000000000..0bf8950b8 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ro.in.txt @@ -0,0 +1,106 @@ +Aceasta carte contine teoria mea originala, numita MDT (Modeling Devices Theory), +asupra functiilor hardware de baza ale unui creier (animal sau uman). + +Fiind o teorie stiintifica, ea este de fapt un model simbolic. Orice model +simbolic trebuie sa contina un numar foarte limitat de termeni fundamentali si un +numar foarte limitat de relatii fundamentale intre termenii fundamentali. Pentru +termenii fundamentali si numai pentru ei, se accepta' definitii bazate pe +descrieri. Toti ceilalti termeni sunt generati de model, odata cu definitiile lor, +prin operatii logico-matematice. Acestea sunt caracteristicile fundamentale ale +oricarei teorii stiintifice. Teoria prezentata urmeaza aceste reguli de baza. + +Aceasta teorie se afla' in totala opozitie cu toate stiintele actuale care +studiaza functionarea creierului si care stiinte nu se bazeaza pe un singur model +simbolic. In acest fel, aceasta teorie descalifica' din start tot ce s-a creat in +ultimele citeva sute de ani in domenii cum ar fi psihologia, psihiatria, +gnoseologia, epistemologia, stiintele comportamentelor animalelor, partial +stiintele sociale si alte domenii conexe. + +Aceasta incercare de revolutie totala este necesara si justificata de urmatoarea +situatie, care situayie exista' independent de existenta sau nu a teoriei mele. + +In psihologie, de exemplu, se folosesc o serie de termeni (constiinta, realitate, +adevar, perceptii, emotii, etc.) care nu au definitii universal acceptate. In +fapt, fiecare psiholog are propriile variante de definitii descriptive asupra +tuturor termenilor folositi de el. Psihologia nu este o stiinta exacta, lucru +universal acceptat. Atunci cind va aparea o stiinta exacta care sa acopere si +domeniul psihologiei, atunci tot ce s-a scris deja in psihologie trebuie abandonat +sau rescris in baza acelei teorii stintifice. + +Intr-o stiinta exacta cum ar fi Mecanica lui Newton, toti termenii folositi au +exact aceleasi definitii pentru oricine, oriunde si oricind, fara nici o +modificare de aproximativ 340 de ani de cind au fost creati. + +De exemplu, termenul "viteza" are o definitie generata de modelul simbolic. Acesta +definitie este v=s/t (se imparte spatiul la timp). Termenul "viteza" nu este deci +introdus prin descriere. + +Sa presupunem acum ca cineva a creat sau va crea un model simbolic fundamental (o +stiinta exacta) care explica' functionarea creierului in mod acceptabil. Prima +consecinta a aparitiei acestui model este ca absolut toti termenii folositi in +domeniile acoperite de acel model, vor fi definiti pe baza modelului. Rezultatul +este cel care a fost enuntat mai sus si anume, tot ce s-a scris in ultimii citeva +sute de ani in asa zisele stiinte asociate creierului, va trebui abandonat sau +rescris. + +Indiferent daca teoria prezentata in aceasta carte va fi sau nu acceptata, mai +repede sau mai tirziu, tot va aparea un model simbolic fundamental care sa explice +functionarea creierului si deci, mai repede sau mai tirziu, tot se va intimpla +aceasta revolutie. + +Aici apare insa o problema suplimentara. Pseudo-stiintele actuale asociate +creierului sunt sustinute de o puternica structura academica si cu caracter +aplicativ/lucrativ. Oamenii care sustin aceasta structura nu au cum sa accepte +nici o teorie bazata pe un singur model simbolic, deoarece asta inseamna' sa ia +totul de la zero. + +Consecinta este faptul ca, chiar daca ar aparea un model simbolic fundamental +"absolut corect ", opozitia care ar aparea ar fi enorma. Nu-mi creez nici o iluzie +ca cineva care deja lucreaza in domeniile acestor pseudo-stiinte va accepta sau +chiar va lua in considerare aceasta teorie sau oricare alta de acest fel. + +Bazat pe experienta de peste 10 ani de cind exista' aceasta teorie, ea a avut +succes la persoanele care lucreaza deja in domeniul stiintelor exacte +(matematicieni, fizicieni,..) dar si la tinerii intre 12 si 20 de ani. Mai precis, +la tinerii care nu sunt inca remorcati de sistemul social-economic actual. Un +student care a primit deja o tema de lucrare de diploma, va trebui sa urmeze linia +trasata de profesorii lui. El nu are cum sa-si riste viitorul aventurindu-se intr- +un domeniu neinteles de profesorii lui. + +Sa vedem ce ofera aceasta teorie. In primul rind, fiind un model simbolic, ea este +bazata pe logica. Ea da definitii extrem de precise si neinterpretabile tuturor +termenilor folositi in asociatie cu functionarea creierului. + +Teoria explica' principiul de functionare al creierului, animal sau uman, pina la +a fi in stare sa faca un proiect logic functional, adica un proiect de dispozitiv +logic, care poate sintetiza functiile de baza ale creierului animal sau uman. + +De fapt, creierul este tratat ca un produs tehnologic. Astfel, se definesc +cerintele fundamentale dar si deficientele fundamentale de proiectare. Sunt +explicate problemele si solutiile legate de implementarea tehnologica a +creierului, in multiplele lui variante. + +Teoria sugereaza faptul ca proiectantul, in decursul zecilor de milenii, a facut +mai multe variante tehnologice care se pot recunoaste in realitatea externa. Se +analizeaza daca prin evolutie se poate trece sau nu, de la un creier de animal la +un creier de om. + +Sunt tratate si problemele de proiectare sau tehnologice, cunoscute sub denumirea +de deficiente/boli psihice (in forme patologice sau nu). + +Teoria trateaza intr-un mod stiintific si asa zisele fenomene paranormale si +sugereaza metode pentru dezvoltarea abilitatilor in acest domeniu. + +Cartea are doua parti. Prima prezinta teoria generala impreuna cu citeva aplicatii +considerate mai importante. In a doua parte sunt prezentate mai detaliat, un numar +de exemple, teste si aplicatii, care sa sustina intelegerea teoriei generale. + +Din cauza ca teoria, numita de mine MDT (Modeling-Devices Theory), a fost scrisa +initial in limba engleza (din 1997 elemente ale ei se afla pe WEB), un mare numar +de termeni sunt prescurtati folosind terminologia engleza. + +Elementele de baza ale teoriei au aparut cam in 1993 si prima forma scrisa in +1995. De atunci teoria a fost perfectionata si dezvoltata si procesul continua. +In anul 2003 o versiune foarte apropiata de aceasta a fost publicata la editura +Cosmos din Sibiu. Aceasta versiune poate fi considerata ca o editie imbunatatita +si adaugita a cartii din 2003.
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/ro.out.txt b/vendor/elasticlunr-rs/tests/data/ro.out.txt new file mode 100644 index 000000000..d2a23148f --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ro.out.txt @@ -0,0 +1,635 @@ +cart +contin +teor +original +numit +mdt +modeling +devices +theory +funct +hardw +baz +creier +animal +uman +fiind +o +teor +stiintif +fapt +model +simbol +model +simbol +trebui +contin +numar +foart +limit +termen +fundamental +si +numar +foart +limit +relat +fundamental +intre +termen +fundamental +termen +fundamental +si +numa +accept +defin +bazat +descrier +tot +ceilalt +termen +gener +model +odat +defin +oper +logico +matemat +caracterist +fundamental +oricare +teor +stiintif +teor +prezent +urmeaz +regul +baz +teor +afla +in +total +opozit +stiint +actual +studiaz +function +creier +si +stiint +bazeaz +singur +model +simbol +in +fel +teor +descalif +start +s +a +creat +in +ultim +citev +sut +ani +in +domen +psiholog +psihiatr +gnoseolog +epistemolog +stiint +comportament +animal +partial +stiint +social +si +alte +domen +conex +incerc +revolut +total +necesar +si +justific +urmat +situat +situay +exist +independent +existent +a +teor +in +psiholog +exemplu +folos +o +ser +termen +constiint +realitat +adevar +percept +emot +etc +defin +universal +accept +in +fapt +psiholog +propr +variant +defin +descript +tuturor +termen +folos +psiholog +o +stiint +exact +lucru +universal +accept +atunc +cind +va +apar +o +stiint +exact +acop +si +domen +psiholog +atunc +s +a +scris +in +psiholog +trebui +abandon +rescris +in +baz +acele +teor +stintif +intr +o +stiint +exact +mecan +newton +tot +termen +folos +exact +aceleas +defin +si +oric +far +o +modific +aproxim +ani +cind +fost +creat +exemplu +termen +vitez +o +definit +gener +model +simbol +definit +v=s/t +impart +spat +termen +vitez +introdus +descrier +presupun +a +creat +va +cre +model +simbol +fundamental +o +stiint +exact +explic +function +creier +in +mod +accept +consecint +a +apar +acest +model +absol +tot +termen +folos +in +domen +acoper +model +vor +defin +baz +model +rezult +a +fost +enunt +sus +si +anum +s +a +scris +in +ultim +citev +sut +ani +in +asa +zis +stiint +asoc +creier +va +treb +abandon +rescris +indiferent +dac +teor +prezent +in +cart +va +accept +reped +tirziu +va +apar +model +simbol +fundamental +explic +function +creier +si +reped +tirziu +va +intimpl +revolut +apar +insa +o +problem +suplimentar +pseudo +stiint +actual +asoc +creier +sustin +o +putern +structur +academ +si +caracter +aplicativ/lucr +oamen +sustin +structur +accept +o +teor +bazat +singur +model +simbol +inseamn +ia +tot +consecint +fapt +dac +apar +model +simbol +fundamental +absol +corect +opozit +apar +enorm +creez +o +iluz +lucreaz +in +domen +acestor +pseudo +stiint +va +accept +va +lua +in +consider +teor +alta +fel +bazat +experient +ani +cind +exist +teor +a +avut +succes +persoan +lucreaz +in +domen +stiint +exact +matematicien +fizicien +si +tiner +intre +si +ani +precis +tiner +inca +remorc +sist +social +econom +actual +student +a +primit +o +tem +lucr +diplom +va +treb +urmez +lini +trasat +profesor +si +rist +viitor +aventur +intr +domeniu +neinteles +profesor +ved +ofer +teor +in +rind +fiind +model +simbol +bazat +logic +defin +extrem +prec +si +neinterpret +tuturor +termen +folos +in +asociat +function +creier +teor +explic +princip +function +creier +animal +uman +pin +a +in +star +fac +proiect +logic +functional +adic +proiect +dispoz +logic +sintetiz +funct +baz +creier +animal +uman +fapt +creier +tratat +produs +tehnolog +astfel +defin +cerint +fundamental +si +deficient +fundamental +proiect +explic +problem +si +solut +legat +implement +tehnolog +a +creier +in +multipl +variant +teor +sugereaz +fapt +proiect +in +decurs +zec +milen +a +facut +mult +variant +tehnolog +recunoast +in +realitat +extern +analizeaz +dac +evolut +trec +creier +animal +creier +om +tratat +si +problem +proiect +tehnolog +cunosc +denum +deficiente/bol +psihic +in +form +patolog +teor +trateaz +intr +mod +stiintif +si +asa +zis +fenomen +paranormal +si +sugereaz +metod +dezvolt +abil +in +domeniu +cart +dou +part +prezint +teor +general +impreun +citev +aplic +consider +import +in +a +dou +part +prezent +detal +numar +exempl +test +si +aplic +sustin +inteleg +teor +general +cauz +teor +numit +mdt +modeling +devices +theory +a +fost +scris +initial +in +limb +englez +element +afla +web +mar +numar +termen +prescurt +folos +terminolog +englez +element +baz +teor +apar +cam +in +si +form +scris +in +atunc +teor +a +fost +perfection +si +dezvolt +si +proces +continu +in +an +o +versiun +foart +aprop +a +fost +public +editur +cosmos +sibiu +versiun +consider +o +edit +imbunatat +si +adaug +a +cart diff --git a/vendor/elasticlunr-rs/tests/data/ru.in.txt b/vendor/elasticlunr-rs/tests/data/ru.in.txt new file mode 100644 index 000000000..76faae3a2 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ru.in.txt @@ -0,0 +1,71 @@ +Московія! Въ понятіи иностранцевъ, отправлявшихся въ этотъ далекій, +загадочный, снѣжный край — центральная ли только Россія? Или и Югъ +съ златоглавымъ Кіевомъ, и Архангельскъ рыбный, и Каспій мутноводный, +и Сибирь съ пушнымъ звѣремъ, и Кавказъ? + +Все это, вмѣстѣ взятое. + +Границы не были четко очерчены. Гдѣ кончалась Московія въ представленіи +даже ученыхъ географовъ того времени, а тѣмъ болѣе въ воображеніи +художниковъ, просто туристовъ, искателей приключеній, составителей +мемуаровъ? + +Границы страны, по которой бродятъ бѣлые медвѣди, гдѣ снѣгъ лежитъ +толстой пеленой, гдѣ люди питаются сырымъ мясомъ и даже поѣдаютъ другъ +друга! + +Но туда ѣдутъ любознательные и пытливые путешественники: Олеарій, +Корбъ, Герберштейнъ и другіе, и понемногу проливается свѣтъ на невѣдомую +страну. Захватывая и всѣ окраины Россіи (Сибирь, Кавказъ), путешественники, +однако, смѣшиваютъ свои представленія о людяхъ, обычаяхъ, костюмахъ, +почти отождествляя, напримѣръ, татарина съ великороссомъ; они искажаютъ +и архитектурныя формы: на ихъ рисункахъ главки Василія Блаженнаго и +русскихъ монастырей пріобрѣтаютъ формы куполовъ персидскихъ дворцовъ +и самаркандскихъ мечетей. + +Но отъ этого экзотическій интересъ ихъ живописныхъ показаній не +ослабляется, а напротивъ усиливается. Явно восточнаго типа халаты, +мѣховыя шапки, длинные рукава — и тутъ же великорусскія кольчуги и +чисто русскіе уборы коней. + +Бытъ, жизнь Московіи кажется имъ суровой. Сколько наказаній тѣлесныхъ — +висѣлицъ! Лѣсомъ цѣлымъ стоятъ висѣлицы на площадяхъ, людей живьемъ +зарываютъ въ могилы, а тутъ же пышные кортежи, пріемы пословъ, +засѣданія Думы Боярской — вотъ картины, проходящія передъ зрителемъ +этихъ изображеній. Заѣзжія экспедиціи и труды отдѣльныхъ современниковъ +иностранцевъ даютъ богатѣйшій матеріалъ, являющійся основой разысканій +о Россіи былыхъ временъ. + +Интересъ къ Россіи, къ ея исторіи, быту, культурѣ, всюду нарастаетъ; +онъ неизбѣжно станетъ еще большимъ. Уже и сейчасъ въ Англіи, въ Германіи, +въ Чехіи издаются книги о старомъ и новомъ русскомъ искусствѣ. + +То же наблюдается и въ Парижѣ, гдѣ усиленно коллекціонируются гравюры +и книги, относящіяся къ Россіи. + +Изъ числа коллекцій, содержащихъ богатый подборъ книгъ, упомянемъ +собранія Апостола, Катенева, Нелидова, Тищенко, Трубецкой, Гревса, +Шуваловой и др. Гравюры, изображающія русскую жизнь и русскій бытъ, +собираютъ всѣ, кто можетъ. + +Особенно цѣннымъ для вопроса, насъ занимающаго нынѣ, является собраніе +П. Н. Апостола, заключающее въ себѣ рѣдкія изданія Олеарія, Корба, +Герберштейна и др. Старательно подобранныя, эти изданія представляютъ +собою особую рѣдкость за границей, ибо многихъ изъ нихъ не имѣетъ даже +Парижская Національная Библіотека. + +Обзоръ хотя бы трехъ-четырехъ авторовъ изъ собранія Апостола даетъ уже +такой богатый матеріалъ для характеристики Россіи XVI-XVII вѣковъ, что +мы и ограничимся пока репродукціями гравюръ изъ этихъ книгъ. Когда я +пересматривалъ чудесныя in quarto и in folio, въ одинъ изъ уютныхъ +вечеровъ, проведенныхъ мною въ Парижѣ, въ квартирѣ П.Н. Апостола, +у меня явилась мысль подобрать такіе наиболѣе интересные моменты изъ +русской жизни, которые будутъ характерны для пониманія иностранцами +Россіи — тогда _Московіи_. + +Выбравъ эти гравюры, я просилъ компетентнаго П.Н. Апостола составить +къ нимъ описаніе. Нынѣ это изданіе, съ необходимыми комментаріями, +предлагается на судъ читателя. + +Думается, _Московія_ въ этихъ, хотя бы немногихъ, «штрихахъ» +закрѣплена. diff --git a/vendor/elasticlunr-rs/tests/data/ru.out.txt b/vendor/elasticlunr-rs/tests/data/ru.out.txt new file mode 100644 index 000000000..01a607f86 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/ru.out.txt @@ -0,0 +1,356 @@ +москові +въ +поняті +иностранцевъ +отправля +въ +этотъ +далекі +загадочн +снѣжны +кра +центральн +россі +югъ +съ +златоглавымъ +кіевомъ +архангельскъ +рыбн +каспі +мутноводн +сибир +съ +пушнымъ +звѣремъ +кавказъ +вмѣстѣ +взят +границ +четк +очерч +гдѣ +конча +москові +въ +представлені +ученыхъ +географовъ +а +тѣмъ +болѣ +въ +воображені +художниковъ +туристовъ +искател +приключені +составител +мемуаровъ +границ +стран +бродятъ +бѣлы +медвѣд +гдѣ +снѣгъ +лежитъ +толст +пелен +гдѣ +пита +сырымъ +мясомъ +поѣдаютъ +другъ +друг +ѣдутъ +любознательн +пытлив +путешественник +олеарі +корбъ +герберштейнъ +другі +понемног +пролива +свѣтъ +невѣдом +стран +захватыв +всѣ +окраин +россі +сибир +кавказъ +путешественник +смѣшиваютъ +представлені +людяхъ +обычаяхъ +костюмахъ +отождествл +напримѣръ +татарин +съ +великороссомъ +искажаютъ +архитектурны +форм +ихъ +рисункахъ +главк +василі +блаженнаг +русскихъ +монастыр +пріобрѣтаютъ +форм +куполовъ +персидскихъ +дворцовъ +самаркандскихъ +мечет +отъ +экзотическі +интересъ +ихъ +живописныхъ +показані +ослабля +а +напротивъ +усилива +явн +восточнаг +тип +халат +мѣховы +шапк +длин +рукав +тутъ +великорусскі +кольчуг +чист +русскі +убор +кон +бытъ +москові +имъ +суров +наказані +тѣлесныхъ +висѣлицъ +лѣсомъ +цѣлымъ +стоятъ +висѣлиц +площадяхъ +люд +живьемъ +зарываютъ +въ +могил +а +тутъ +пышн +кортеж +пріем +пословъ +засѣдані +дум +боярск +вотъ +картин +проходящі +передъ +зрителемъ +этихъ +изображені +заѣзжі +экспедиці +труд +отдѣльныхъ +современниковъ +иностранцевъ +даютъ +богатѣйші +матеріалъ +являющі +основ +разыскані +россі +былыхъ +временъ +интересъ +къ +россі +къ +е +исторі +быт +культурѣ +нарастаетъ +онъ +неизбѣжн +станетъ +большимъ +сейчасъ +въ +англі +въ +германі +въ +чехі +изда +книг +старомъ +новомъ +русскомъ +искусствѣ +наблюда +въ +парижѣ +гдѣ +усилен +коллекціонир +гравюр +книг +относящі +къ +россі +изъ +числ +коллекці +содержащихъ +богат +подборъ +книгъ +упомянемъ +собрані +апостол +катенев +нелидов +тищенк +трубецк +гревс +шувалов +др +гравюр +изображающі +русск +русскі +бытъ +собираютъ +всѣ +можетъ +цѣннымъ +вопрос +насъ +занимающаг +нынѣ +явля +собрані +п +н +апостол +заключа +въ +себѣ +рѣдкія +издані +олеарі +корб +герберштейн +др +старательн +подобранны +издані +представляютъ +особ +рѣдкост +границ +иб +многихъ +изъ +нихъ +имѣетъ +парижск +національн +библіотек +обзоръ +трехъ +четырехъ +авторовъ +изъ +собрані +апостол +даетъ +богат +матеріалъ +характеристик +россі +вѣковъ +огранич +репродукці +гравюръ +изъ +этихъ +книгъ +пересматривалъ +чудесны +въ +одинъ +изъ +уютныхъ +вечеровъ +проведенныхъ +въ +парижѣ +въ +квартирѣ +п.н +апостол +яв +мысл +подобра +такі +наиболѣ +интересн +момент +изъ +русск +жизн +будутъ +характерн +понимані +иностранц +россі +москові +выбравъ +гравюр +просилъ +компетентнаг +п.н +апостол +состав +къ +нимъ +описані +нынѣ +издані +съ +необходим +комментарі +предлага +судъ +читател +дума +москові +въ +этихъ +немногихъ +штрихахъ +закрѣпл diff --git a/vendor/elasticlunr-rs/tests/data/sv.in.txt b/vendor/elasticlunr-rs/tests/data/sv.in.txt new file mode 100644 index 000000000..6e7192190 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/sv.in.txt @@ -0,0 +1,132 @@ +Likasom förtjensten att undan förgängelsen hafva räddat Finlands +historiska minnen nästan uteslutande tillhör Porthan, likaså hafva +nationens mythiska qvarlefvor hufvudsakligast genom D:r Lönnrots +verksamhet blifvit framkallade i ljuset. Alldeles obanad var väl +icke heller den väg, han beträdde; men, hvad man före honom gjort, +är liksom det icke vore till, sedan Kalevala och Kanteletar trädt +i dagen. De få dessförinnan gjorda runo-samlingar hafva numera +betydelse endast i den mon, som de tjenat till att framkalla dessa +åtminstone för Finland evigt dyrbara skatter. Men ur denna synpunkt +betraktade hafva de ett stort, historiskt värde, och vi anse af sådan +anledning för en pligt att omnämna de förnämsta bland dem, som för +D:r Lönnrot banat vägen. -- Den som först fästade uppmärksamheten +vid vår inhemska poesi, var Porthan -- den Finska litteraturens +heros. Man tror, att Porthan det oaktadt icke alltför högt uppskattat +de mythiska sångerna; men han samlade dem icke desto mindre, och +genom hans föresyn väcktes äfven andra, i synnerhet Ganander, att +fortgå på samma bana. Gananders förtjenst består dock icke så mycket +i gjorda runo-samlingar, som i bemödandet att tillvägabringa en +Finsk Mythologi. Väl säger han sig "_i flere år hafva genomgått +alla tillgängliga och i landet möjligen existerande Runor_", men +ur dedicationen i hans arbete framlyser, att de till större delen +blifvit af Porthan honom meddelade. Detsamma torde i ännu högre +grad gälla om Lenqvist, hvars mythologiska arbete för öfrigt vida +öfverträffar Gananders. I allmänhet synes man vid denna tid ännu +icke varit betänkt på utgifvandet af en fullständig runo-samling, +utan ansåg det till en början vara nödvändigare att bearbeta en +mythologi, såsom "clavis poëseos Fennicae" (Gan.). -- Efter Porthans +död stod den Finska litteraturen en lång tid stilla, sörjande blott +den hädangångne. Men med år 1809 vaknade hon åter till nytt lif. Vid +denna tid eller åtminstone kort derefter uppträdde många utmärkta +litteratörer, och under den tidrymd, som sedan tilländalupit, +räknar den Finska litteraturen flere bearbetare, än under hela det +föregående seklet. Ibland dem hafva de flesta i större eller mindre +mon egnat sin uppmärksamhet åt folkpoesien. Mest förtjent är i detta +afseende D:r Topelius, hvilken i fem särskildta häften utgifvit en +samling af äldre och nyare runor. Honom tillhör äfven förtjensten +att hafva angifvit de orter, hvarest de Finska sångerna renast och i +största mängd bibehållit sig. Han säger i företalet till femte häftet +af sin samling: "På få ställen och nästan ingenstädes i Finland +finnas fullständiga och oförderfvade forntida qväden. -- -- +Ett enda ställe på jorden, som äfven är utom Finlands gräns, nemligen +några Socknar i Archangelska Guvernementet samt i synnerhet, +Wuokkiniemi Församling, räddar ännu fordna seder och den fordna +hjelte-ättens minnen i deras rena och oförfalskade drägt. -- -- +Derifrån har äfven jag med icke ringa möda förskaffat mina bästa +sånger." -- År 1820 företog sig Prof. v. Becker i Turun Wiikko-Sanomat +att till enhet bringa en mängd sånger om Wäinämöinen. Detta försök, så +obetydligt det äfven var, förtjenar likväl uppmärksamhet, såsom det +första i sitt slag. Emellertid synes man i början icke fästat någon +uppmärksamhet vid denna vink. Således fortfor Topelius att utgifva +sin samling i fragmentarisk form. Samma, method följdes af D:r +Lönnrot i "Kantele", ehuru, han i företalet till Kalevala säger +sig redan år 1826 vid författandet af sin academiska afhandling om +Wäinämöinen hafva kommit på den tanken, att de förhanden varande +Sångerna om Wäinämöinen, Ilmarinen och Lemminkäinen m.ö. sannolikt +voro fragmenter ur längre cykler. Men de runor han samlade under sina +vandringar i Finland åren 1828 och 1831, voro alltför obetydliga för +att gifva anledning till ett combinations-försök. Först sedan han +1832 och följande åren besökt de af Topelius anvista orter utom +Finska gränsen, började han utföra sin storartade plan att till +enhet bringa alla Fornfinska sånger af episkt innehåll. [Ehuru den +ursprungligt Finska folkpoesien är så charakterristisk, att deri +icke en enda rad kan infuskas, utan att hvar och en, som gjort dess +närmare bekantskap, genast urskiljer det äkta och ursprungliga från +all oäkta afvel, så hafva likväl åtskilliga Pseudo-critici i mjugg +sökt insinuera den oskyldiga förmodan, att D:r Lönnrot, för att +få sin idé realiserad, sjelf completterat Kalevala. Skulle dessa +affällingar vilja i nåder skänka sitt fosterland en tusende del af +den kärlek, hvarmed de sannolikt omfatta sina egna, höga personer, +så vore det för dem, om icke lätt, åtminstone möjligt att öfvertyga +sig derom, att icke en enda af D:r Lönnrot sjelf författad vers +förekommer i hela Kalevala. Vare det likväl långt ifrån mig att för +den Finska Litteraturen vilja göra proselyter af dessa petrificater.] +Huru han lyckats i detta företag, må blifva ett problem för kommande +tider, då, såsom vi hoppas, ett ännu större antal af forntida qväden +kommit i dagsljuset. Emellertid vore det önskligt, att D:r Lönnrot +skulle redogöra för sammanhanget i Kalevala och ådagalägga den enhet, +han trott sig finna deri. Af de åsigter, han uttalat i sitt företal +till Kalevala, synes härflyta, att han icke anser detta qväde kunna +göra anspråk på enhet i högre mening. Han säger bland annat: "Enligt +min mening hafva dessa runor uppstått småningom och i samma ordning, +som händelserna tilldragit sig. De skilda sångerna om Wäinämöinen, +Ilmarinen och Lemminkäinen torde icke vara författade af en enda, +utan ett verk af många. Den ena anförtrodde åt minnet en, den andra +en annan tilldragelse och skildrade, hvad han sjelf hade sett eller +hört." Utan att inlåta oss i någon kritik öfver dessa påståenden, +vilja vi allenast anmärka, att, enligt D:r Lönnrots åsigt om runornas +historiska uppkomst, enheten i Kalevala kommer att bero derpå, att de +skilda tilldragelserna ordnas i behörig tidsföljd. Det var sannolikt +äfven denna åsigt, som förmådde honom att i Kalevala införa runorna +28-32, hvilka till sitt innehåll på intet vis sammanhänga med de +öfriga. -- Men huru D:r Lönnrot äfven må hafva uppfattat sammanhanget +i Kalevala, så är i hvarje fall den mening obefogad, som finnes +uttalad i företalet till Runola, att Kalevala endast innehåller en +mängd stympade runo-fragmenter. Hvem medger icke, att mången runa +under tidernas lopp kunnat stympas och förändras, ja till och med +försvinna? Men en hvar, som ej låter förblinda sig af fördomar, bör +finna i Kalevala vida mer än fragmenter. Öfversättaren skall, för +att icke göra sig skyldig till maktspråk af motsatt beskaffenhet, +lemna en kort öfversigt af arbetets innehåll, så att läsaren bättre +kan sammanhålla det hela och lättare inse sammanhanget emellan de +särskildta partierna. -- Det är i fråga om detta sammanhang icke +likgiltigt, hvilken ordning runo-sångarne sjelfva iakttaga. De flesta +följa naturligtvis ingen ordning, utan recitera ett stycke allt +eftersom det faller dem i minnet. Men de flesta bland dem jag under +mina vandringar i Olonetska och Archangelska Guvernementen varit +i tillfälle att höra, sjunga runorna om Sampo i ett sammanhang. +Wäinämöinens, Ilmarinens och Lemminkäinens Pohjola-färder betrakta +de äter såsom skilda cykler. Men då Pohjas fagra mö var målet för +dessa färder, så kunna äfven dessa cykler betraktas såsom ett inom sig +slutet helt. Sålunda erbjuder Kalevala tvenne väsendtliga afdelningar, +hvilka åter hafva ett ganska nära inbördes sammanhang derigenom, att +Pohjolas värdinna hade utfästat sin dotter såsom belöning för den, som +kunde smida Sampo. Detta underbara redskap förfärdigas af Ilmarinen. +Ehuru således flickan hade bordt tillhöra honom, ledsnade han likväl +vid lifvet i Pohjola, förr än det lyckats honom att tillvinna sig +hennes kärlek, och återvände så till sin hembygd. Emellertid sökte +äfven Wäinämöinen och Lemminkäinen att vinna hennes ynnest; men +slutligen segrade dock Ilmarinen -- Genom de runor, som besjunga dessa +trenne hjeltars Pohjola-färder, afsöndrar D:r Lönnrot Sampo-cykeln +i tvenne afdelningar. Han låter frieri-runorna begynna, så snart Sampo +var hopsmidd, och Ilmarinen återvände till hemmet. Denna fördelning kan +ur flere skäl försvaras. Först och främst träda Kalevala och Pohjola +i den sednare afdelningen af Sampo-cykeln i ett så fiendtligt +förhållande till hvarandra, att några frieri-färder ifrån den ena +orten till den ändra derefter ej kunna tänkas äga rum. Vidare är +det ganska naturligt, att fiendtligheterna först då taga sin början, +sedan Ilmarinens hustru blifvit dödad, och Pohja-dottren icke +mera utgjorde ett föreningsband emellan Pohjolas och Kalevalas +folk. Ett vigtigt skäl för ifrågavarande fördelning är äfven det, +att Lemminkäinens öden i den sednare afdelningen af Sampo-cykeln +förutsättas såsom bekanta?
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/sv.out.txt b/vendor/elasticlunr-rs/tests/data/sv.out.txt new file mode 100644 index 000000000..07306f3e3 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/sv.out.txt @@ -0,0 +1,858 @@ +likasom +förtjenst +undan +förgäng +hafv +rädd +finland +historisk +minn +nästan +uteslut +tillhör +porthan +likaså +hafv +nation +mythisk +qvarlefv +hufvudsak +genom +d:r +lönnrot +verksam +blifvit +framkall +ljuset +alldel +oban +väl +hell +väg +beträd +hvad +för +gjort +liksom +vor +kaleval +kantelet +trädt +dag +få +dessförinnan +gjord +runo +samling +hafv +numer +betyd +end +mon +tjen +framkall +åtminston +finland +evig +dyrbar +skatt +ur +synpunk +betrak +hafv +stort +historisk +värd +ans +af +anledning +pligt +omnämn +förnämst +bland +d:r +lönnrot +ban +väg +först +fäst +uppmärksam +inhemsk +poesi +porthan +finsk +litteratur +hero +tror +porthan +oaktad +alltför +högt +uppskat +mythisk +sång +saml +desto +mindr +genom +föresyn +väck +äfv +andr +synner +ganand +fortgå +ban +ganander +förtjenst +består +dock +gjord +runo +samling +bemöd +tillvägabring +finsk +mythologi +väl +säg +fler +år +hafv +genomgåt +tillgäng +landet +möj +exister +run +ur +dedication +arbet +framlys +störr +del +blifvit +af +porthan +meddel +detsamm +tord +ännu +högr +grad +gäll +lenqvist +hvar +mythologisk +arbet +öfr +vid +öfverträff +ganander +allmän +syn +tid +ännu +betänk +utgifv +af +fullständ +runo +samling +ansåg +början +nödvänd +bearbet +mythologi +såsom +clavis +poëseo +fennica +gan +porthan +död +stod +finsk +litteratur +lång +tid +still +sörj +blott +hädangångn +år +vakn +åter +nytt +lif +tid +åtminston +kort +dereft +uppträd +mång +utmärk +litteratör +tidrymd +tilländalupit +räkn +finsk +litteratur +fler +bearbet +hel +föregåend +seklet +ibland +hafv +flest +störr +mindr +mon +egn +uppmärksam +folkpoesi +mest +förtjent +afseend +d:r +topelius +hvilk +fem +särskild +häft +utgifvit +samling +af +äldr +nyar +run +tillhör +äfv +förtjenst +hafv +angifvit +ort +hvarest +finsk +sång +ren +störst +mäng +bibehållit +säg +företalet +femt +häftet +af +samling +få +ställ +nästan +ingenstäd +finland +finn +fullständ +oförderfv +forntid +qväd +end +ställ +jord +äfv +utom +finland +grän +nem +sockn +archangelsk +guvernementet +samt +synner +wuokkiniemi +församling +rädd +ännu +fordn +sed +fordn +hjelt +ätt +minn +ren +oförfalsk +drägt +derifrån +äfv +ring +möd +förskaff +bäst +sång +år +företog +prof +v +beck +turun +wiikko +sanom +enhet +bring +mäng +sång +wäinämöin +försök +obetyd +äfv +förtjen +likväl +uppmärksam +såsom +först +sitt +slag +emellertid +syn +början +fäst +uppmärksam +vink +såled +fortf +topelius +utgifv +samling +fragmentarisk +form +method +följd +af +d:r +lönnrot +kantel +ehuru +företalet +kaleval +säg +redan +år +förfat +af +academisk +afhandling +wäinämöin +hafv +kommit +tank +förh +var +sång +wäinämöin +ilmarin +lemminkäin +m.ö +sannolik +voro +fragment +ur +längr +cykl +run +saml +vandring +finland +åren +voro +alltför +obetyd +gifv +anledning +combination +försök +först +följ +åren +besök +af +topelius +anvist +ort +utom +finsk +gräns +börj +utför +storart +plan +enhet +bring +fornfinsk +sång +af +episk +innehåll +ehuru +ursprung +finsk +folkpoesi +charakterristisk +deri +end +rad +infusk +hvar +gjort +närm +bekantskap +gen +urskilj +äkt +ursprung +all +oäkt +afvel +hafv +likväl +åtskil +pseudo +critici +mjugg +sökt +insinuer +oskyld +förmodan +d:r +lönnrot +få +idé +realiser +sjelf +completter +kaleval +affälling +vilj +nåd +skänk +sitt +fosterland +tusend +del +af +kärlek +hvarmed +sannolik +omfat +egn +hög +person +vor +lätt +åtminston +möj +öfvertyg +derom +end +af +d:r +lönnrot +sjelf +förfat +ver +förekomm +hel +kaleval +var +likväl +lång +ifrån +finsk +litteratur +vilj +gör +proselyt +af +petrificat +huru +lyckat +företag +må +blifv +problem +komm +tid +såsom +hopp +ännu +störr +antal +af +forntid +qväd +kommit +dagsljuset +emellertid +vor +önsk +d:r +lönnrot +redogör +sammanhanget +kaleval +ådagalägg +enhet +trott +finn +deri +af +åsig +uttal +sitt +företal +kaleval +syn +härflyt +ans +qväd +kunn +gör +anspråk +enhet +högr +mening +säg +bland +ann +enl +mening +hafv +run +uppståt +småningom +ordning +händ +tilldragit +skild +sång +wäinämöin +ilmarin +lemminkäin +tord +förfat +af +end +verk +af +mång +ena +anförtrod +minnet +andr +annan +tilldrag +skildr +hvad +sjelf +sett +hört +inlåt +kritik +öfv +påståend +vilj +allen +anmärk +enl +d:r +lönnrot +åsig +run +historisk +uppkomst +enhet +kaleval +komm +bero +derpå +skild +tilldrag +ordn +behör +tidsföljd +sannolik +äfv +åsig +förmåd +kaleval +inför +run +hvilk +sitt +innehåll +intet +vis +sammanhäng +öfr +huru +d:r +lönnrot +äfv +må +hafv +uppfat +sammanhanget +kaleval +hvarj +fall +mening +obefog +finn +uttal +företalet +runol +kaleval +end +innehåll +mäng +stymp +runo +fragment +hvem +medg +mång +run +tid +lopp +kunn +stymp +förändr +ja +försvin +hvar +låt +förblind +af +fördom +bör +finn +kaleval +vid +mer +fragment +öfversät +skall +gör +skyld +maktspråk +af +motsat +beskaffen +lemn +kort +öfvers +af +arbetet +innehåll +läs +bättr +sammanhåll +hel +lätt +ins +sammanhanget +emellan +särskild +parti +fråg +sammanhang +likgilt +hvilk +ordning +runo +sång +sjelfv +iakttag +flest +följ +naturligtvis +ordning +reciter +styck +eftersom +fall +minnet +flest +bland +vandring +olonetsk +archangelsk +guvernement +tillfäll +hör +sjung +run +sampo +sammanhang +wäinämöin +ilmarin +lemminkäin +pohjol +färd +betrak +äter +såsom +skild +cykl +pohj +fagr +mö +målet +färd +kunn +äfv +cykl +betrak +såsom +slutet +helt +sålund +erbjud +kaleval +tvenn +väsendt +afdelning +hvilk +åter +hafv +gansk +när +inbörd +sammanhang +derigenom +pohjol +värdin +utfäst +dott +såsom +belöning +smid +sampo +underbar +redskap +förfärd +af +ilmarin +ehuru +såled +flickan +bord +tillhör +ledsn +likväl +lifvet +pohjol +förr +lyckat +tillvin +kärlek +återvänd +hembyg +emellertid +sökt +äfv +wäinämöin +lemminkäin +vinn +ynnest +slut +segr +dock +ilmarin +genom +run +besjung +trenn +hjeltar +pohjol +färd +afsöndr +d:r +lönnrot +sampo +cykeln +tvenn +afdelning +låt +frieri +run +begyn +snart +sampo +hopsmid +ilmarin +återvänd +hemmet +fördelning +ur +fler +skäl +försvar +först +främst +träd +kaleval +pohjol +sedn +afdelning +af +sampo +cykeln +fiendt +förhåll +hvarandr +frieri +färd +ifrån +ena +ort +ändr +dereft +kunn +tänk +äga +rum +vid +gansk +natur +fiendt +först +tag +början +ilmarin +hustru +blifvit +död +pohj +dottr +mer +utgjord +föreningsband +emellan +pohjol +kaleval +folk +vigt +skäl +ifrågavar +fördelning +äfv +lemminkäin +öden +sedn +afdelning +af +sampo +cykeln +förutsät +såsom +bekant diff --git a/vendor/elasticlunr-rs/tests/data/tr.in.txt b/vendor/elasticlunr-rs/tests/data/tr.in.txt new file mode 100644 index 000000000..3781f5ccc --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/tr.in.txt @@ -0,0 +1,3 @@ +Ah gençlik!.. Tıpkı ezeli bir baharın ilk çiçekli günlerine benzer. Yeşil kırlar, kelebek dolu bahçeler, güzel kokular içinde serçelerin şen efsanelerini doymadan dinleyerek dolaşırız. İdealimizin rüyası bize hayat kışının fırtınalarını, karlarını, tipilerini hatırlatmaz. Ben işte bu hiç bitmez sanılan baharı İzmir'de geçirdim. On dokuz yaşındaydım. Galiba on beş sene evvel... Evet, seneler nasıl bir ok gölgesi gibi uçuyor! Meşrutiyetin bu hür, bu serbest günlerinden çok uzaktık. Lâkin o eski, zalim idarenin ezici kahrını, gafletim sayesinde hiç duymuyordum. Mersinli'deki minimini evimde, kocaman çınar ağaçlarının hiç durmadan öten ninnileri içinde, kitapların dipsiz girdabına dalmış gitmiştim. Haricî kainat umrumda değildi. Sözde, felsefe feneriyle büyük bir hakikat bulacaktım. Heyhat! Şimdi bu masum hülyamı aklıma getirince, nasıl acı acı gülüyorum... Bir kelimeyi, bir satırı, bir sözü haftalarca, aylarca düşünür, bir cümlenin altındaki —var tevehhüm ettiğim— gizli mânâyı bulmak için birçok geceler uyuyamazdım. Filozofların pek o kadar mânâ murad etmeden yumurtladığı fikirler, bence bir "ilahi nass" gibiydi. Hatta romanlarda rasgeldiğim "ukalalık"lar bile gözümden kaçmazdı. Onları da fişlere yazar, notlarımın arasına kordum. + +Bu "ukalalık"lardan birisi, beni tam üç ay düşündürdü. Tam yüz beş gece gözüme uyku girmedi. Flaubert'in miydi, yoksa bir başkasının mı, iyice hatırlayamıyorum. "Le grade dégrade...", yani: "Rütbe, haysiyeti düşürtür." cümlesi! Bundan bir türlü mânâ çıkaramadım. Bilakis, fikrimce rütbe insanı herkesin seviyesinden yukarı kaldırır, yükseltir, hatta sahibine hususi bir haysiyet verirdi. Artık başka kitap, gazete falan okuyamaz oldum... Her satırın altında, mânâsını anlamadığım bu "Le grade dégrade.." cümlesi kararıyor, bir avuç istifham işaretinden yuğrulmuş sabit bir fikir gibi dimağımda düğümleniyordu. Sakin evimde oturamıyor, bulamadığım mânâyı arayarak tenha sahillerde, kalabalık caddelerde, dar sokaklarda serseri serseri dolaşıyordum. Bir "meçhul", bir "sır" insana ne kadar ıztırap verir; bâhusus masum bir iman da olursa... Bir gün yine deli gibi, içimden: "Le grade dégrade..." diye söylenerek Hükümet Konağı'nın önünden geçiyordum. İsmimi işittim. Döndüm. Bir de baktım ki, riyâziye muallimim, Logaritmacı Hasan! Askerî Kıraathanesinin ta köşesinde bir sandalyeye kurulmuş nargilesini çekiyor...
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/tr.out.txt b/vendor/elasticlunr-rs/tests/data/tr.out.txt new file mode 100644 index 000000000..2c7accd0b --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/tr.out.txt @@ -0,0 +1,239 @@ +ah +gençlik +tıpkı +ezel +bahar +ilk +çiçekli +gün +benzer +yeşil +kır +kelebek +dol +bahçe +güzel +koku +iç +serçe +şen +efsane +doyma +dinleyerek +dolaşır +i̇deal +rüyas +hayat +kış +fırtına +kar +tipi +hatırlatmaz +bitmez +sanıla +bahar +i̇zmir' +geçir +yaş +galip +se +evvel +evet +sene +ok +gölges +uçuyor +meşrutiyet +hür +serbest +gün +uzak +lâkin +eski +zal +idare +eziç +kahr +gaflet +saye +duymuyor +mersinli' +minim +ev +kocama +çınar +ağaç +durma +ö +ninni +iç +kitap +dipsiz +girdap +dal +gitmiş +haricî +kainat +umr +değil +söz +felsef +fener +büyük +hakikat +bulacak +heyhat +ş +mas +hülya +akl +getir +aç +aç +gülüyor +kelime +satır +söz +hafta +ay +düşünür +cümle +alt +tevehh +ettik +gizli +mânâyı +bulmak +gece +uyuyamaz +filozof +mânâ +muradı +etme +yumurtladık +fikir +be +ilah +nass +gip +roman +rasgeldik +ukalalık" +göz +kaçmaz +fiş +yazar +not +ara +kor +ukalalık" +biris +tam +ay +düşündür +tam +geç +göz +uyku +girmedi +flaubert' +mi +başka +iyiç +hatırlayamıyor +le +grade +dégrade +rütbe +haysiyet +düşür +cümles +türlü +mânâ +çıkaramadı +bilakis +fikr +rütbe +insa +seviye +yukar +kaldırır +yüksel +sahip +hususi +haysiyet +verir +ar +başka +kitap +gaze +fala +okuyamaz +ol +satır +alt +mânâs +anlamadık +le +grade +dégrade +cümles +kararıyor +avuç +istifha +işaret +yuğrul +sabit +fikir +dimak +düğümleniyor +sak +ev +oturamıyor +bulamadık +mânâyı +arayarak +tenha +sahil +kalabalık +cadde +dar +sokak +serser +serser +dolaşıyor +meçhul +sır +insa +ıztırap +verir +bâhusus +mas +ima +gün +del +iç +le +grade +dégrade +söylenerek +hükümet +konağı'n +ön +geçiyor +i̇sm +işit +dö +bak +riyâzi +muall +logaritmaç +hasa +askerî +kıraathane +ta +köşe +sandalye +kurul +nargile +çekiyor diff --git a/vendor/elasticlunr-rs/tests/data/zh.in.txt b/vendor/elasticlunr-rs/tests/data/zh.in.txt new file mode 100644 index 000000000..f95aa96d4 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/zh.in.txt @@ -0,0 +1 @@ +这条法国邮船白拉日隆子爵号(VicomtedeBragelonne)正向中国开来。早晨八点多钟,冲洗过的三等舱甲板湿意未干,但已坐满了人,法国人、德国流亡出来的犹太人、印度人、安南人,不用说还有中国人。海风里早含着燥热,胖人身体给炎风吹干了,上一层汗结的盐霜,仿佛刚在巴勒斯坦的死海里洗过澡。毕竟是清晨,人的兴致还没给太阳晒萎,烘懒,说话做事都很起劲。那几个新派到安南或中国租界当警察的法国人,正围了那年轻善撒娇的犹太女人在调情。俾斯麦曾说过,法国公使大使的特点,就是一句外国话不会讲;这几位警察并不懂德文,居然传情达意,引得犹太女人格格地笑,比他们的外交官强多了。这女人的漂亮丈夫,在旁顾而乐之,因为他几天来,香烟、啤酒、柠檬水沾光了不少。红海已过,不怕热极引火,所以等一会甲板上零星果皮、纸片、瓶塞之外,香烟头定又遍处皆是。法国人的思想是有名的清楚,他的文章也明白干净,但是他的做事,无不混乱、肮脏、喧哗,但看这船上的乱糟糟。这船,倚仗人的机巧,载满人的扰攘,寄满人的希望,热闹地行着,每分钟把沾污了人气的一小方小面,还给那无情、无尽、无际的大海。
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/data/zh.out.txt b/vendor/elasticlunr-rs/tests/data/zh.out.txt new file mode 100644 index 000000000..82e77a0f9 --- /dev/null +++ b/vendor/elasticlunr-rs/tests/data/zh.out.txt @@ -0,0 +1,253 @@ +这 +条 +法国 +邮船 +白 +拉 +日隆 +子爵 +号 +VicomtedeBragelonne +正向 +中国 +开来 +早晨 +八点 +多 +钟 +冲洗 +过 +三等 +三等舱 +甲板 +湿 +意 +未 +干 +但 +已 +坐满 +人 +法国 +国人 +法国人 +德国 +流亡 +出来 +犹太 +犹太人 +印度 +印度人 +安南 +人 +不用 +不用说 +还有 +中国 +人 +海风 +里 +早 +含 +着 +燥热 +胖 +人 +身体 +给 +炎风 +吹干 +上 +一层 +汗 +结 +盐霜 +仿佛 +刚 +在 +巴勒 +勒斯 +巴勒斯 +巴勒斯坦 +死 +海里 +洗过 +洗过澡 +毕竟 +是 +清晨 +人 +兴致 +还 +没 +给 +太阳 +晒 +萎 +烘 +懒 +说话 +做事 +都 +很 +起劲 +那 +几个 +新派 +到 +安南 +或 +中国 +租界 +当 +警察 +法国 +国人 +法国人 +正 +围 +那 +年轻 +善 +撒娇 +犹太 +女人 +在 +调情 +俾斯麦 +曾 +说 +过 +法国 +公使 +大使 +特点 +就是 +一句 +外国 +话 +不会 +讲 +这 +几位 +警察 +并 +不 +懂 +德文 +居然 +传情 +达意 +引得 +犹太 +女人 +格格 +地 +笑 +比 +他们 +外交 +外交官 +强 +多 +这 +女人 +漂亮 +丈夫 +在 +旁 +顾 +而 +乐 +之 +因为 +他 +几天 +来 +香烟 +啤酒 +柠檬 +柠檬水 +沾光 +不少 +红海 +已 +过 +不怕 +热 +极 +引火 +所以 +等 +一会 +甲板 +上 +零星 +果皮 +纸片 +瓶塞 +之外 +香烟 +烟头 +香烟头 +定 +又 +遍 +处 +皆 +是 +法国 +国人 +法国人 +思想 +是 +有名 +清楚 +他 +文章 +也 +明白 +干净 +但是 +他 +做事 +无不 +混乱 +肮脏 +喧哗 +但 +看 +这 +船上 +乱糟 +乱糟糟 +这 +船 +倚仗 +人 +机巧 +载满 +人 +扰攘 +寄满 +人 +希望 +热闹 +地 +行 +着 +分钟 +每分钟 +把 +沾污 +人气 +一小 +方 +小 +面 +还给 +那 +无情 +无尽 +无际 +大海 diff --git a/vendor/elasticlunr-rs/tests/searchindex_fixture_en.json b/vendor/elasticlunr-rs/tests/searchindex_fixture_en.json new file mode 100644 index 000000000..5e7b61cee --- /dev/null +++ b/vendor/elasticlunr-rs/tests/searchindex_fixture_en.json @@ -0,0 +1,1392 @@ +{ + "documentStore": { + "docInfo": { + "1": { + "body": 8, + "title": 2 + }, + "2": { + "body": 13, + "title": 2 + }, + "3": { + "body": 11, + "title": 2 + }, + "4": { + "body": 9, + "title": 2 + }, + "5": { + "body": 11, + "title": 2 + }, + "6": { + "body": 4, + "title": 2 + } + }, + "docs": { + "1": { + "body": "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + "id": "1", + "title": "Chapter 1" + }, + "2": { + "body": "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad", + "id": "2", + "title": "Chapter 2" + }, + "3": { + "body": "minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex", + "id": "3", + "title": "Chapter 3" + }, + "4": { + "body": "ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate", + "id": "4", + "title": "Chapter 4" + }, + "5": { + "body": "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat", + "id": "5", + "title": "Chapter 5" + }, + "6": { + "body": "Spatiëring shouldn’t cause a panic.", + "id": "6", + "title": "Chapter 6" + } + }, + "length": 6, + "save": true + }, + "fields": [ + "title", + "body" + ], + "index": { + "body": { + "root": { + "1": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + }, + "2": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "3": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "4": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "5": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + }, + "6": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + }, + "a": { + "d": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + }, + "i": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "s": { + "c": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + } + } + } + }, + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "q": { + "df": 0, + "docs": {}, + "u": { + "a": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "p": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + } + } + }, + "m": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + } + }, + "u": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "c": { + "a": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "s": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + } + } + }, + "df": 0, + "docs": {}, + "h": { + "a": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "r": { + "df": 6, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + }, + "5": { + "tf": 1.0 + }, + "6": { + "tf": 1.0 + } + } + } + } + } + } + }, + "df": 0, + "docs": {} + }, + "i": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + } + } + } + }, + "o": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "o": { + "d": { + "df": 0, + "docs": {}, + "o": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + } + } + }, + "n": { + "df": 0, + "docs": {}, + "s": { + "df": 0, + "docs": {}, + "e": { + "c": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + } + } + } + } + }, + "df": 0, + "docs": {}, + "q": { + "df": 0, + "docs": {}, + "u": { + "a": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + } + } + } + } + } + } + }, + "d": { + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "r": { + "df": 4, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + }, + "5": { + "tf": 1.0 + } + } + } + } + } + }, + "u": { + "df": 0, + "docs": {}, + "i": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "df": 0, + "docs": {}, + "e": { + "a": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "s": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "o": { + "d": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + } + } + } + }, + "l": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + } + }, + "n": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + }, + "s": { + "df": 0, + "docs": {}, + "s": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + }, + "t": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "u": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + }, + "x": { + "c": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + } + } + } + } + } + }, + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + }, + "e": { + "df": 0, + "docs": {}, + "r": { + "c": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + }, + "df": 0, + "docs": {} + } + } + } + }, + "f": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "g": { + "df": 0, + "docs": {}, + "i": { + "a": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + } + } + } + }, + "i": { + "df": 0, + "docs": {}, + "n": { + "c": { + "df": 0, + "docs": {}, + "i": { + "d": { + "df": 0, + "docs": {}, + "i": { + "d": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + }, + "df": 0, + "docs": {} + } + }, + "df": 0, + "docs": {} + }, + "p": { + "df": 0, + "docs": {}, + "s": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + } + } + }, + "r": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + } + }, + "l": { + "a": { + "b": { + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + }, + "i": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "df": 0, + "docs": {} + }, + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "r": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + } + } + } + }, + "m": { + "a": { + "df": 0, + "docs": {}, + "g": { + "df": 0, + "docs": {}, + "n": { + "a": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + } + }, + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + } + }, + "n": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "s": { + "df": 0, + "docs": {}, + "i": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + }, + "o": { + "df": 0, + "docs": {}, + "s": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "r": { + "df": 0, + "docs": {}, + "u": { + "d": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + } + } + } + }, + "u": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "l": { + "a": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + } + } + }, + "o": { + "c": { + "c": { + "a": { + "df": 0, + "docs": {}, + "e": { + "c": { + "a": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + }, + "df": 0, + "docs": {} + } + }, + "df": 0, + "docs": {} + }, + "df": 0, + "docs": {} + }, + "df": 0, + "docs": {} + }, + "p": { + "a": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "i": { + "c": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + } + }, + "r": { + "df": 0, + "docs": {}, + "i": { + "a": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + } + }, + "df": 0, + "docs": {} + }, + "q": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "i": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + }, + "r": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "r": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "h": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "n": { + "d": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "r": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + } + } + } + } + } + } + }, + "s": { + "df": 0, + "docs": {}, + "e": { + "d": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "df": 0, + "docs": {} + }, + "h": { + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "l": { + "d": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "’": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + } + } + }, + "i": { + "df": 0, + "docs": {}, + "n": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + }, + "t": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + }, + "p": { + "a": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "ë": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + }, + "t": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "m": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "o": { + "df": 0, + "docs": {}, + "r": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + } + } + } + }, + "u": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "l": { + "a": { + "df": 0, + "docs": {}, + "m": { + "c": { + "df": 0, + "docs": {}, + "o": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + } + }, + "df": 0, + "docs": {} + } + }, + "t": { + "df": 2, + "docs": { + "2": { + "tf": 1.4142135623730951 + }, + "3": { + "tf": 1.0 + } + } + } + }, + "v": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "i": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + } + } + }, + "n": { + "df": 0, + "docs": {}, + "i": { + "a": { + "df": 0, + "docs": {}, + "m": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "df": 0, + "docs": {} + } + } + }, + "o": { + "df": 0, + "docs": {}, + "l": { + "df": 0, + "docs": {}, + "u": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "t": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + } + } + } + } + } + }, + "title": { + "root": { + "1": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + }, + "2": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "3": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "4": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "5": { + "df": 1, + "docs": { + "5": { + "tf": 1.0 + } + } + }, + "6": { + "df": 1, + "docs": { + "6": { + "tf": 1.0 + } + } + }, + "c": { + "df": 0, + "docs": {}, + "h": { + "a": { + "df": 0, + "docs": {}, + "p": { + "df": 0, + "docs": {}, + "t": { + "df": 0, + "docs": {}, + "e": { + "df": 0, + "docs": {}, + "r": { + "df": 6, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + }, + "5": { + "tf": 1.0 + }, + "6": { + "tf": 1.0 + } + } + } + } + } + } + }, + "df": 0, + "docs": {} + } + }, + "df": 0, + "docs": {} + } + } + }, + "lang": "English", + "pipeline": [ + "trimmer", + "stopWordFilter", + "stemmer" + ], + "ref": "id", + "version": "0.9.5" +}
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/searchindex_fixture_ja.json b/vendor/elasticlunr-rs/tests/searchindex_fixture_ja.json new file mode 100644 index 000000000..9045518cf --- /dev/null +++ b/vendor/elasticlunr-rs/tests/searchindex_fixture_ja.json @@ -0,0 +1,1215 @@ +{ + "documentStore": { + "docInfo": { + "1": { + "body": 5, + "title": 2 + }, + "2": { + "body": 15, + "title": 2 + }, + "3": { + "body": 72, + "title": 2 + }, + "4": { + "body": 33, + "title": 2 + } + }, + "docs": { + "1": { + "body": "吾輩は猫である。名前はまだ無い。", + "id": "1", + "title": "第1章" + }, + "2": { + "body": "どこで生れたかとんと見当がつかぬ。何でも薄暗いじめじめした所でニャーニャー泣いていた事だけは記憶している。", + "id": "2", + "title": "第2章" + }, + "3": { + "body": "吾輩はここで始めて人間というものを見た。しかもあとで聞くとそれは書生という人間中で一番獰悪な種族であったそうだ。この書生というのは時々我々を捕えて煮て食うという話である。しかしその当時は何という考もなかったから別段恐しいとも思わなかった。ただ彼の掌に載せられてスーと持ち上げられた時何だかフワフワした感じがあったばかりである。掌の上で少し落ちついて書生の顔を見たのがいわゆる人間というものの見始であろう。この時妙なものだと思った感じが今でも残っている。", + "id": "3", + "title": "第3章" + }, + "4": { + "body": "第一毛をもって装飾されべきはずの顔がつるつるしてまるで薬缶だ。その後猫にもだいぶ逢ったがこんな片輪には一度も出会わした事がない。のみならず顔の真中があまりに突起している。", + "id": "4", + "title": "第4章" + } + }, + "length": 4, + "save": true + }, + "fields": [ + "title", + "body" + ], + "index": { + "body": { + "root": { + "df": 0, + "docs": {}, + "あ": { + "df": 0, + "docs": {}, + "っ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "と": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "ま": { + "df": 0, + "docs": {}, + "り": { + "df": 0, + "docs": {}, + "に": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + } + }, + "い": { + "df": 0, + "docs": {}, + "た": { + "df": 0, + "docs": {}, + "事": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "と": { + "df": 0, + "docs": {}, + "も": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "る": { + "df": 3, + "docs": { + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + } + } + }, + "わ": { + "df": 0, + "docs": {}, + "ゆ": { + "df": 0, + "docs": {}, + "る": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "こ": { + "df": 0, + "docs": {}, + "こ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "の": { + "df": 1, + "docs": { + "3": { + "tf": 1.4142135623730951 + } + } + }, + "ん": { + "df": 0, + "docs": {}, + "な": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "さ": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "し": { + "df": 3, + "docs": { + "2": { + "tf": 1.4142135623730951 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.7320508075688772 + } + }, + "か": { + "df": 0, + "docs": {}, + "し": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "も": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + }, + "じ": { + "df": 0, + "docs": {}, + "め": { + "df": 0, + "docs": {}, + "じ": { + "df": 0, + "docs": {}, + "め": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + } + }, + "そ": { + "df": 0, + "docs": {}, + "う": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "の": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + }, + "後": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "れ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "た": { + "df": 0, + "docs": {}, + "だ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "だ": { + "df": 0, + "docs": {}, + "い": { + "df": 0, + "docs": {}, + "ぶ": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "つ": { + "df": 0, + "docs": {}, + "か": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "る": { + "df": 0, + "docs": {}, + "つ": { + "df": 0, + "docs": {}, + "る": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + } + }, + "と": { + "df": 0, + "docs": {}, + "ん": { + "df": 0, + "docs": {}, + "と": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + }, + "ど": { + "df": 0, + "docs": {}, + "こ": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "な": { + "df": 0, + "docs": {}, + "い": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "か": { + "df": 0, + "docs": {}, + "っ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "ら": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "の": { + "df": 1, + "docs": { + "3": { + "tf": 1.4142135623730951 + } + }, + "み": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "は": { + "df": 0, + "docs": {}, + "ず": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "ま": { + "df": 0, + "docs": {}, + "だ": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + }, + "る": { + "df": 0, + "docs": {}, + "で": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "も": { + "df": 0, + "docs": {}, + "の": { + "df": 1, + "docs": { + "3": { + "tf": 1.7320508075688772 + } + } + } + }, + "ら": { + "df": 0, + "docs": {}, + "れ": { + "df": 1, + "docs": { + "3": { + "tf": 1.4142135623730951 + } + } + } + }, + "れ": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "ス": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "フ": { + "df": 0, + "docs": {}, + "ワ": { + "df": 0, + "docs": {}, + "フ": { + "df": 0, + "docs": {}, + "ワ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "一": { + "df": 1, + "docs": { + "4": { + "tf": 1.4142135623730951 + } + }, + "番": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "上": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "中": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "事": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "人": { + "df": 0, + "docs": {}, + "間": { + "df": 1, + "docs": { + "3": { + "tf": 1.7320508075688772 + } + } + } + }, + "今": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "何": { + "df": 2, + "docs": { + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + } + }, + "だ": { + "df": 0, + "docs": {}, + "か": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + }, + "出": { + "df": 0, + "docs": {}, + "会": { + "df": 0, + "docs": {}, + "わ": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + } + }, + "別": { + "df": 0, + "docs": {}, + "段": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "名": { + "df": 0, + "docs": {}, + "前": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + }, + "吾": { + "df": 0, + "docs": {}, + "輩": { + "df": 2, + "docs": { + "1": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + } + } + } + }, + "妙": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "始": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + }, + "め": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "少": { + "df": 0, + "docs": {}, + "し": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "度": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "当": { + "df": 0, + "docs": {}, + "時": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "彼": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "思": { + "df": 0, + "docs": {}, + "っ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "わ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "恐": { + "df": 0, + "docs": {}, + "し": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "感": { + "df": 0, + "docs": {}, + "じ": { + "df": 1, + "docs": { + "3": { + "tf": 1.4142135623730951 + } + } + } + }, + "我": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "所": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + }, + "持": { + "df": 0, + "docs": {}, + "ち": { + "df": 0, + "docs": {}, + "上": { + "df": 0, + "docs": {}, + "げ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "捕": { + "df": 0, + "docs": {}, + "え": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "掌": { + "df": 1, + "docs": { + "3": { + "tf": 1.4142135623730951 + } + } + }, + "時": { + "df": 1, + "docs": { + "3": { + "tf": 1.7320508075688772 + } + } + }, + "書": { + "df": 0, + "docs": {}, + "生": { + "df": 1, + "docs": { + "3": { + "tf": 1.7320508075688772 + } + } + } + }, + "残": { + "df": 0, + "docs": {}, + "っ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "毛": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "泣": { + "df": 0, + "docs": {}, + "い": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "無": { + "df": 0, + "docs": {}, + "い": { + "df": 1, + "docs": { + "1": { + "tf": 1.0 + } + } + } + }, + "煮": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "片": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "猫": { + "df": 2, + "docs": { + "1": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + } + } + }, + "獰": { + "df": 0, + "docs": {}, + "悪": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "生": { + "df": 0, + "docs": {}, + "れ": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "真": { + "df": 0, + "docs": {}, + "中": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "種": { + "df": 0, + "docs": {}, + "族": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "突": { + "df": 0, + "docs": {}, + "起": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "章": { + "df": 4, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + } + } + }, + "第": { + "df": 4, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.4142135623730951 + } + } + }, + "考": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "聞": { + "df": 0, + "docs": {}, + "く": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "落": { + "df": 0, + "docs": {}, + "ち": { + "df": 0, + "docs": {}, + "つ": { + "df": 0, + "docs": {}, + "い": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "薄": { + "df": 0, + "docs": {}, + "暗": { + "df": 0, + "docs": {}, + "い": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + } + }, + "薬": { + "df": 0, + "docs": {}, + "缶": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "装": { + "df": 0, + "docs": {}, + "飾": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "見": { + "df": 1, + "docs": { + "3": { + "tf": 1.7320508075688772 + } + }, + "当": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "記": { + "df": 0, + "docs": {}, + "憶": { + "df": 1, + "docs": { + "2": { + "tf": 1.0 + } + } + } + }, + "話": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + }, + "載": { + "df": 0, + "docs": {}, + "せ": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + }, + "輪": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + }, + "逢": { + "df": 0, + "docs": {}, + "っ": { + "df": 1, + "docs": { + "4": { + "tf": 1.0 + } + } + } + }, + "顔": { + "df": 2, + "docs": { + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.4142135623730951 + } + } + }, + "食": { + "df": 0, + "docs": {}, + "う": { + "df": 1, + "docs": { + "3": { + "tf": 1.0 + } + } + } + } + } + }, + "title": { + "root": { + "df": 0, + "docs": {}, + "章": { + "df": 4, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + } + } + }, + "第": { + "df": 4, + "docs": { + "1": { + "tf": 1.0 + }, + "2": { + "tf": 1.0 + }, + "3": { + "tf": 1.0 + }, + "4": { + "tf": 1.0 + } + } + } + } + } + }, + "lang": "Japanese", + "pipeline": [ + "trimmer-ja", + "stemmer-ja" + ], + "ref": "id", + "version": "0.9.5" +}
\ No newline at end of file diff --git a/vendor/elasticlunr-rs/tests/test-index.rs b/vendor/elasticlunr-rs/tests/test-index.rs new file mode 100644 index 000000000..2edbc17cd --- /dev/null +++ b/vendor/elasticlunr-rs/tests/test-index.rs @@ -0,0 +1,102 @@ +use elasticlunr::*; +use serde_json::json; +use std::fs::{self, File}; +use std::path::Path; + +fn create_index(lang: Box<dyn Language>, docs: &'static [[&'static str; 2]]) -> serde_json::Value { + let mut index = Index::with_language(lang, &["title", "body"]); + for (i, doc) in docs.iter().enumerate() { + index.add_doc(&(i + 1).to_string(), doc); + } + json!(index) +} + +fn generate_fixture( + lang: Box<dyn Language>, + docs: &'static [[&'static str; 2]], +) -> serde_json::Value { + let code = lang.code(); + let src = create_index(lang, docs); + let dest = Path::new(env!("CARGO_MANIFEST_DIR")) + .join(format!("tests/searchindex_fixture_{}.json", code)); + let dest = File::create(&dest).unwrap(); + serde_json::to_writer_pretty(dest, &src).unwrap(); + src +} + +fn read_fixture(lang: &dyn Language) -> serde_json::Value { + let src = Path::new(env!("CARGO_MANIFEST_DIR")) + .join(format!("tests/searchindex_fixture_{}.json", lang.code())); + let json = fs::read_to_string(src).unwrap(); + serde_json::from_str(&json).expect("Unable to deserialize the fixture") +} + +const GENERATE_FIXTURE: bool = false; + +fn check_index<L: Language + Clone + 'static>(lang: L, docs: &'static [[&'static str; 2]]) { + let new_index = create_index(Box::new(lang.clone()), docs); + let name = lang.name(); + let fixture_index = if GENERATE_FIXTURE { + generate_fixture(Box::new(lang), docs) + } else { + read_fixture(&lang) + }; + if new_index != fixture_index { + panic!("The {} search index has changed from the fixture", name); + } +} + +#[test] +fn en_search_index_hasnt_changed_accidentally() { + check_index(lang::English::new(), DOCS_EN); +} + +#[cfg(feature = "ja")] +#[test] +fn ja_search_index_hasnt_changed_accidentally() { + check_index(lang::Japanese::new(), DOCS_JA); +} + +const DOCS_EN: &'static [[&'static str; 2]] = &[ + [ + "Chapter 1", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit", + ], + [ + "Chapter 2", + "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad", + ], + [ + "Chapter 3", + "minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex", + ], + [ + "Chapter 4", + "ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate", + ], + [ + "Chapter 5", + "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat", + ], + ["Chapter 6", "Spatiëring shouldn’t cause a panic."], +]; + +#[cfg(feature = "ja")] +const DOCS_JA: &'static [[&'static str; 2]] = &[ + [ + "第1章", + "吾輩は猫である。名前はまだ無い。", + ], + [ + "第2章", + "どこで生れたかとんと見当がつかぬ。何でも薄暗いじめじめした所でニャーニャー泣いていた事だけは記憶している。", + ], + [ + "第3章", + "吾輩はここで始めて人間というものを見た。しかもあとで聞くとそれは書生という人間中で一番獰悪な種族であったそうだ。この書生というのは時々我々を捕えて煮て食うという話である。しかしその当時は何という考もなかったから別段恐しいとも思わなかった。ただ彼の掌に載せられてスーと持ち上げられた時何だかフワフワした感じがあったばかりである。掌の上で少し落ちついて書生の顔を見たのがいわゆる人間というものの見始であろう。この時妙なものだと思った感じが今でも残っている。", + ], + [ + "第4章", + "第一毛をもって装飾されべきはずの顔がつるつるしてまるで薬缶だ。その後猫にもだいぶ逢ったがこんな片輪には一度も出会わした事がない。のみならず顔の真中があまりに突起している。", + ], +]; diff --git a/vendor/elasticlunr-rs/tests/test-pipeline.rs b/vendor/elasticlunr-rs/tests/test-pipeline.rs new file mode 100644 index 000000000..cdc70592d --- /dev/null +++ b/vendor/elasticlunr-rs/tests/test-pipeline.rs @@ -0,0 +1,68 @@ +// Input text is excerpted from public domain books on gutenberg.org or wikisource.org + +use elasticlunr::*; +use std::fs::File; +use std::io::{BufRead, BufReader, Read, Write}; +use std::path::Path; + +#[allow(dead_code)] +fn write_output(lang: &dyn Language) { + let code = lang.code(); + let base = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("data"); + + let input = base.join(&format!("{}.in.txt", code)); + let mut input_str = String::new(); + File::open(&input) + .unwrap() + .read_to_string(&mut input_str) + .unwrap(); + + let output = base.join(&format!("{}.out.txt", code)); + let mut output = File::create(&output).unwrap(); + + let pipeline = lang.make_pipeline(); + let tokens = pipeline.run(lang.tokenize(&input_str)); + + for tok in tokens { + writeln!(&mut output, "{}", tok).unwrap(); + } +} + +fn compare_to_fixture(lang: &dyn Language) { + let code = lang.code(); + let base = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("data"); + + let input = base.join(&format!("{}.in.txt", code)); + let mut input_str = String::new(); + File::open(&input) + .unwrap() + .read_to_string(&mut input_str) + .unwrap(); + + let output = base.join(&format!("{}.out.txt", code)); + let mut output = BufReader::new(File::open(&output).unwrap()).lines(); + + let pipeline = lang.make_pipeline(); + let tokens = pipeline.run(lang.tokenize(&input_str)); + + for tok in tokens { + assert_eq!( + tok, + output.next().unwrap().unwrap(), + "Comparing pipeline tokens to fixture for {}", + lang.name() + ); + } +} + +#[test] +fn test_languages() { + for lang in lang::languages() { + //write_output(lang.as_ref()); + compare_to_fixture(lang.as_ref()); + } +} |