summaryrefslogtreecommitdiffstats
path: root/vendor/bstr
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/bstr
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/bstr')
-rw-r--r--vendor/bstr/.cargo-checksum.json2
-rw-r--r--vendor/bstr/Cargo.lock96
-rw-r--r--vendor/bstr/Cargo.toml26
-rwxr-xr-xvendor/bstr/scripts/generate-unicode-data149
-rw-r--r--vendor/bstr/scripts/regex/grapheme.sh50
-rw-r--r--vendor/bstr/scripts/regex/sentence.sh176
-rw-r--r--vendor/bstr/scripts/regex/word.sh111
-rw-r--r--vendor/bstr/src/ext_slice.rs8
-rw-r--r--vendor/bstr/src/lib.rs14
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfabin10781 -> 22420 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfabin10781 -> 22420 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfabin55271 -> 90997 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfabin55271 -> 90997 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfabin366 -> 1240 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfabin366 -> 1240 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfabin153619 -> 200879 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfabin153619 -> 200879 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfabin9237 -> 11095 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfabin9237 -> 11095 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfabin572 -> 2964 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfabin572 -> 2964 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfabin884 -> 3232 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfabin884 -> 3232 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfabin236309 -> 299263 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfabin236309 -> 299263 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/grapheme.rs23
-rw-r--r--vendor/bstr/src/unicode/sentence.rs9
-rw-r--r--vendor/bstr/src/unicode/whitespace.rs14
-rw-r--r--vendor/bstr/src/unicode/word.rs17
37 files changed, 249 insertions, 851 deletions
diff --git a/vendor/bstr/.cargo-checksum.json b/vendor/bstr/.cargo-checksum.json
index 90c0ab073..04cad9221 100644
--- a/vendor/bstr/.cargo-checksum.json
+++ b/vendor/bstr/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"COPYING":"68653aaa727a2bfa31b7a751e31701ce33c49d695c12dd291a07d1c54da4c14b","Cargo.lock":"89aebb25a85f875bc2ccc3a90cf4a2c86dff11f95ff90ddb1f208d65a9fc85fb","Cargo.toml":"665a9c704132b7dd3b6dc75ba3ca772cf8bc5a28f33012943af94ca1bc4f85ad","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6b7374c39a57e57fc2c38eb529c4c88340152b10f51dd5ae2d819dfa67f61715","README.md":"5e20af8472e06926761584e3c249ebc8b9802f1eb13440276d9aa267e70e5955","examples/graphemes-std.rs":"100264f623ff973be76831fb1d4519e6f371b21972d6577bb49bf7bbff4d0d5e","examples/graphemes.rs":"401c5fac813f78e4029ece9c98bccb3128637c507d8667b73e069bfbc9d7f2f4","examples/lines-std.rs":"094a48bfd483ec01f80f9c937ddfe6f0bdbf09f960ba822215ec8ed9862624df","examples/lines.rs":"65ae4edbdb0ccff8ff40cdc70b4e7a70824f5028daff2e1b2a3247f884589db8","examples/uppercase-std.rs":"33aed88e38483aa303625757304a974594476a3a659d8bdd4877aceb90ff8be3","examples/uppercase.rs":"2cdf7f173cb6a5d4c16a967e3f733bc40331f5167da519c5194ceee187ff814f","examples/words-std.rs":"ffde2fccd361890fab0e0b051915a749d5d51e95b9be700b76fada231d002f00","examples/words.rs":"aa805faa5012714428ef895596947c333417c2b16a7e0155d4a128be7428fc17","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/generate-unicode-data":"75d68617b5acf564cc681ddfbf77a6097eaa9a9f8f54af4e16905dda3dc6df77","scripts/regex/grapheme.sh":"d796bca73278f6ab04d65f285b2dc72efcad76874f98f4bfa22bf38f2eaeece7","scripts/regex/sentence.sh":"7892e07ac9e450967bd79279558cbef2f0fc0b0aefab24217910937ed6330f06","scripts/regex/word.sh":"b3f53e2331e9e50a1a7232b7d278aaecace6a42ef6c16dd0b8d0ec59fd2aaf4f","src/ascii.rs":"5aae67428421ad9e2156d7b27f5931bd924056b9af073ec53c44ef8e2d29cc8a","src/bstr.rs":"be1313d13814f3818068f1f6c96e4a1eecf1ecdec42c360f90379596804ea0ef","src/bstring.rs":"1cd7656dc3a6eded742eb7e9e43c83a5d020e6a419060c186788c8e1010f7dcc","src/byteset/mod.rs":"3f88d5594d95724c6eda96f79006a27dab80c4f849f00342b0bae3efedc32c45","src/byteset/scalar.rs":"fb84085b30d2901407877eb306828574eaf0dea907bbb7f02c36f24f0dc6f7b6","src/escape_bytes.rs":"207b7b92398912f940f911cfaafca04b7c62dac1106063ae2c7bd0676238b4cf","src/ext_slice.rs":"9e849981a4b4288b3d1237372847c81a6068186f041c8b04cab901a87a2bcc82","src/ext_vec.rs":"4dd9af267c07554051787c752e22e091684eb93f1a70c70ac2537535d1a54a07","src/impls.rs":"cd585f19d96f06b35cb17a7a8479f14f6b624beecf01501ea077f4b8eaacced4","src/io.rs":"73afcb89230d940b17a5917696c3f7c55267aefcb42db4164062dbf18875b677","src/lib.rs":"b4433d15492fe85983cbe1bf23249d1dac54aa5df4510989939933a43bfd07f9","src/tests.rs":"8adfd1a4a9da91b2a4dff25ffafcf99d914be3f5b7d67d66cdcb40a2d72abd04","src/unicode/data/GraphemeBreakTest.txt":"ddc7d4d1f3838573b94fc5d83ff7217e63c47b22ae1cd40c5fe1a54efc15589b","src/unicode/data/LICENSE-UNICODE":"8b9babb256418ec15761d635a49f973424939affba7a0a88de2fc2690e454a23","src/unicode/data/SentenceBreakTest.txt":"7e42dd749dbb94aa44b13faf9df6319d9a16ce2ea09a3a094fcfbb5962168040","src/unicode/data/WordBreakTest.txt":"8094b544ec1580c7e41ac0187805cc1aeb330a90301ec7505563e1a59318284e","src/unicode/fsm/grapheme_break_fwd.bigendian.dfa":"ae5220a77570720fcf78e63794d4cddbeef365fc3aaeec7dde391c229bc0a840","src/unicode/fsm/grapheme_break_fwd.littleendian.dfa":"3f9ce5d78325ede1651587e24e12357740a90608c784ac59c643abd42c4d9a83","src/unicode/fsm/grapheme_break_fwd.rs":"b6d937ec3afee23ea7c01ff9c0eeff1fc4f85287b87659dca80765db49d6b09e","src/unicode/fsm/grapheme_break_rev.bigendian.dfa":"fa2c745adc61060f08e5734f19acc09de387b0abd671597a543b4d4d80fd7a04","src/unicode/fsm/grapheme_break_rev.littleendian.dfa":"a10fd82f63b0f0aa08e5e7f09000c020c7ff4cfe6240afb11a615c663100de99","src/unicode/fsm/grapheme_break_rev.rs":"d9de2be51a17c5be37142ac44b9e2f0627c05a9101d5b1e23fd78229ca0ef75d","src/unicode/fsm/mod.rs":"50b8baa692e83f909a0fe62eced9666b712a68b6c7bf42976c8cc37e49dd9b64","src/unicode/fsm/regional_indicator_rev.bigendian.dfa":"db9db4c86bced5f4aaf68d5e475e13e5d4976c237deec13c192111a399aa5858","src/unicode/fsm/regional_indicator_rev.littleendian.dfa":"0905f70acddd423c1b53bfbeb73299009f724400029d7f9a987d63c32d36e36c","src/unicode/fsm/regional_indicator_rev.rs":"50b89fc6f7d461c789e88cc6f1a769257104b7f45eb01bd31047e898f1e9587a","src/unicode/fsm/sentence_break_fwd.bigendian.dfa":"0cd36026a86ea5d2e4710b8278733982808e341c88b62c4f9ca309417a181dc9","src/unicode/fsm/sentence_break_fwd.littleendian.dfa":"f3b85da014d1c94e1b444f3fca2952d1a5fbf2a9f42e32574eb52e027a797281","src/unicode/fsm/sentence_break_fwd.rs":"2c6147825fd78c15ecdb952d368d519f81bbf196eedf3e90e927699e832c7080","src/unicode/fsm/simple_word_fwd.bigendian.dfa":"635ab3e9c589268ef91a48c8b9b038e156deaf4a9a4475fce49ca75eabddccf7","src/unicode/fsm/simple_word_fwd.littleendian.dfa":"4f92b789385027a9276498a829cc8e5a3ecdd5f3c6d88254c6cd23d95d828c57","src/unicode/fsm/simple_word_fwd.rs":"44a2b90c8b4a2fa50c66cacc1d48afd47a8f7aa4753dd391471b48a9a891be71","src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa":"593c8ad059ab0bee60a2ea25f4c1fc89c105cb19a9bda3fa98d1464b8e87cfc0","src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa":"a04ed70d5dbd969c0af0e12bec5033ca910161c486f741dd0a792d2e5b0cc6f6","src/unicode/fsm/whitespace_anchored_fwd.rs":"e0f3f0be717ff306409ea9242f507847c4c0fa7469eccbd98a849389afe7fd26","src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa":"9ab09359ce73058d22e5bfa857e040831d49f4a53dd25da804136e9db9e7f5fb","src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa":"cb5804786bd98bfe0726f3f7733d72bc1d69130c8a8b026465c03c78f8c8ac79","src/unicode/fsm/whitespace_anchored_rev.rs":"8f27a50dfe549db99e768225c2956489f233f5a4986857a5ff5f2b507375a69d","src/unicode/fsm/word_break_fwd.bigendian.dfa":"72278d401ac119d50e06c6b8a4cb29d54366d1da536cfaedc3a20bb8cc1b742c","src/unicode/fsm/word_break_fwd.littleendian.dfa":"97dcdca86472d96faadd15d1c0328a76484971f3c4ad409c29049791cb9ed3eb","src/unicode/fsm/word_break_fwd.rs":"3ce6b28015dd6f2b330759da46263e8227f921e2370d4aa6922b8be26db558a8","src/unicode/grapheme.rs":"9bac56709754b48d42ee35282d5752c554a5af3e70b08c01977872f70ffa2afd","src/unicode/mod.rs":"fc67b0d64e9395398235c8663706b246edc0742e6cfe87057eeabdb1b19ad28d","src/unicode/sentence.rs":"8af1f274f80120b04928a6e560bfb3f2c9176d882f729265494b1a796a370681","src/unicode/whitespace.rs":"1fe313906fce009060267ae14de10e0ce577f0e2e2018273ee79d008dc9cf2f5","src/unicode/word.rs":"ed1bef53cf01ef6e682898e802e1654356a763c3993b4f16898eb5ed4b5e7637","src/utf8.rs":"e759713023dc3e5f9f5b2e6c3ba601af591ce5b2ad71aba729e3c29bcf6007e3"},"package":"a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"} \ No newline at end of file
+{"files":{"COPYING":"68653aaa727a2bfa31b7a751e31701ce33c49d695c12dd291a07d1c54da4c14b","Cargo.lock":"c36bf13a0db7e61dafd933b6b6c00ab9937ec0dedd86e258cec9f77398ffe7ed","Cargo.toml":"98e6de7edd0e320aace57321c5ca2527c79ad18d68978fba20e3274dbf6b484d","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6b7374c39a57e57fc2c38eb529c4c88340152b10f51dd5ae2d819dfa67f61715","README.md":"5e20af8472e06926761584e3c249ebc8b9802f1eb13440276d9aa267e70e5955","examples/graphemes-std.rs":"100264f623ff973be76831fb1d4519e6f371b21972d6577bb49bf7bbff4d0d5e","examples/graphemes.rs":"401c5fac813f78e4029ece9c98bccb3128637c507d8667b73e069bfbc9d7f2f4","examples/lines-std.rs":"094a48bfd483ec01f80f9c937ddfe6f0bdbf09f960ba822215ec8ed9862624df","examples/lines.rs":"65ae4edbdb0ccff8ff40cdc70b4e7a70824f5028daff2e1b2a3247f884589db8","examples/uppercase-std.rs":"33aed88e38483aa303625757304a974594476a3a659d8bdd4877aceb90ff8be3","examples/uppercase.rs":"2cdf7f173cb6a5d4c16a967e3f733bc40331f5167da519c5194ceee187ff814f","examples/words-std.rs":"ffde2fccd361890fab0e0b051915a749d5d51e95b9be700b76fada231d002f00","examples/words.rs":"aa805faa5012714428ef895596947c333417c2b16a7e0155d4a128be7428fc17","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ascii.rs":"5aae67428421ad9e2156d7b27f5931bd924056b9af073ec53c44ef8e2d29cc8a","src/bstr.rs":"be1313d13814f3818068f1f6c96e4a1eecf1ecdec42c360f90379596804ea0ef","src/bstring.rs":"1cd7656dc3a6eded742eb7e9e43c83a5d020e6a419060c186788c8e1010f7dcc","src/byteset/mod.rs":"3f88d5594d95724c6eda96f79006a27dab80c4f849f00342b0bae3efedc32c45","src/byteset/scalar.rs":"fb84085b30d2901407877eb306828574eaf0dea907bbb7f02c36f24f0dc6f7b6","src/escape_bytes.rs":"207b7b92398912f940f911cfaafca04b7c62dac1106063ae2c7bd0676238b4cf","src/ext_slice.rs":"e0dff7cf2fa384d9dccf8c720b9867003f258a5dc6011b0debe80d68dae35c1f","src/ext_vec.rs":"4dd9af267c07554051787c752e22e091684eb93f1a70c70ac2537535d1a54a07","src/impls.rs":"cd585f19d96f06b35cb17a7a8479f14f6b624beecf01501ea077f4b8eaacced4","src/io.rs":"73afcb89230d940b17a5917696c3f7c55267aefcb42db4164062dbf18875b677","src/lib.rs":"5e665152491cc9ce3a1f2cee3f235704b49e32017c691b45a371714aa6969d1e","src/tests.rs":"8adfd1a4a9da91b2a4dff25ffafcf99d914be3f5b7d67d66cdcb40a2d72abd04","src/unicode/data/GraphemeBreakTest.txt":"ddc7d4d1f3838573b94fc5d83ff7217e63c47b22ae1cd40c5fe1a54efc15589b","src/unicode/data/LICENSE-UNICODE":"8b9babb256418ec15761d635a49f973424939affba7a0a88de2fc2690e454a23","src/unicode/data/SentenceBreakTest.txt":"7e42dd749dbb94aa44b13faf9df6319d9a16ce2ea09a3a094fcfbb5962168040","src/unicode/data/WordBreakTest.txt":"8094b544ec1580c7e41ac0187805cc1aeb330a90301ec7505563e1a59318284e","src/unicode/fsm/grapheme_break_fwd.bigendian.dfa":"ed5aa2efd017d8815d58ffc1dc65525948f8ed003d81ac891f78ee04181dca81","src/unicode/fsm/grapheme_break_fwd.littleendian.dfa":"31c832b147705f1d144e43d117fdde35092fe569bbe7dcc97e5961fe6860791e","src/unicode/fsm/grapheme_break_fwd.rs":"2b35935d19226ccd10f26633f3c6b6c3fc61bf00fdefd314fe350d3dbb333ee4","src/unicode/fsm/grapheme_break_rev.bigendian.dfa":"ab3b82ed1bbfdbb7d7da178d7ed7f4fd5f66d20dc4688f5643abd89b9b4fe0ea","src/unicode/fsm/grapheme_break_rev.littleendian.dfa":"e30d2863ffa26181d736c3a3e8df9bcad104360bd2eed5bab23473f4ec9287ae","src/unicode/fsm/grapheme_break_rev.rs":"1da158d1f12c6313e227357d00ad56c8718792cd4d2b54ce3fed9446cd4b370e","src/unicode/fsm/mod.rs":"50b8baa692e83f909a0fe62eced9666b712a68b6c7bf42976c8cc37e49dd9b64","src/unicode/fsm/regional_indicator_rev.bigendian.dfa":"97b61dc64bfbf612d5ff5a17b126b9e3afd303fb36f5aa2f4a199b6f3b7cced5","src/unicode/fsm/regional_indicator_rev.littleendian.dfa":"5b0e114dc14d7dc46a8398f98018a630df33ccff8b909ddcd44a0da8d2ad20d1","src/unicode/fsm/regional_indicator_rev.rs":"c973a021197982282dfb12cd918185836795e44914daea19df4b4e4a156ae680","src/unicode/fsm/sentence_break_fwd.bigendian.dfa":"0dd4dfa025a932862e6d04a9fdc0aa25c26ac1d9d1a0baa05651d851c1fa4718","src/unicode/fsm/sentence_break_fwd.littleendian.dfa":"cb37cd218f2714928916640154a0803d257e114271ff0ea90055bc0f4e8a2d41","src/unicode/fsm/sentence_break_fwd.rs":"3e3708f16992e3b157b29304e5dce47fcadbf5db1d0b85828f852fbbb7a9da09","src/unicode/fsm/simple_word_fwd.bigendian.dfa":"27fefb1ab8f0cd6d4d3b877832122c2e35d7921eaf29c0e6de96ff51611b20f8","src/unicode/fsm/simple_word_fwd.littleendian.dfa":"a9340c695ed76aa37353ce8f337e84e08714754b9ccbba64fcfaddb9a32c4e87","src/unicode/fsm/simple_word_fwd.rs":"59b818f24363d622d8a4bf40872e3648b582779861d4ce7a7a0b8a2f2d04d721","src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa":"9a8886693ceb7616d8838e370e1fe5a2a9aafed50891278f70dd443a5d72e4d4","src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa":"661745fc2ca838e2ebb10dba01c8dd5cd72b264ef71b2b343196a0bcf5daaac7","src/unicode/fsm/whitespace_anchored_fwd.rs":"77183ab0bc353aa688ca0c829dbef011eeab8b9eff07ef861c92ac905f9d64ba","src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa":"cf5f2fb71f3ea6eb2f6349c039f00d9cc72b9fa949910594c1453c5fa8b2f4da","src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa":"791825312415cbd24aca8b8262ded3e1fabc785e3eba3660f65019296f8e5f58","src/unicode/fsm/whitespace_anchored_rev.rs":"0e98166e69ba84eb7f5e7ce504e4aa8cb9aa3043f7f95629b1432daeb9e55678","src/unicode/fsm/word_break_fwd.bigendian.dfa":"54908d18964ef017b909594af6ce976af9a22698845a858234724e747240462d","src/unicode/fsm/word_break_fwd.littleendian.dfa":"c7977465637868e286fcb818b4cab7349a9c1612f4b0a1d06b0ba5f2d8281018","src/unicode/fsm/word_break_fwd.rs":"ce643936af3b8a8a91b8ee8252f08a080a2041ef6784e7c46ab9b4d5acee463c","src/unicode/grapheme.rs":"a42b8b61c9dfc05c3c15e311bfef7a36f5f4222d9f044d1d990259591a60a103","src/unicode/mod.rs":"fc67b0d64e9395398235c8663706b246edc0742e6cfe87057eeabdb1b19ad28d","src/unicode/sentence.rs":"85bab665e0be53957743211cb4956060b6087716127251a3f962a57e87cd2775","src/unicode/whitespace.rs":"f4961ea2020edb6bb382a56e7ad91ee9b221670842669c5e070f19995840abaa","src/unicode/word.rs":"73eef99becfd0697f55500052df057c45a3aa9393839e67f77dbd227a7ea88f4","src/utf8.rs":"e759713023dc3e5f9f5b2e6c3ba601af591ce5b2ad71aba729e3c29bcf6007e3"},"package":"4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"} \ No newline at end of file
diff --git a/vendor/bstr/Cargo.lock b/vendor/bstr/Cargo.lock
index 7d9e05482..6d3746ae3 100644
--- a/vendor/bstr/Cargo.lock
+++ b/vendor/bstr/Cargo.lock
@@ -4,10 +4,9 @@ version = 3
[[package]]
name = "bstr"
-version = "1.5.0"
+version = "1.6.2"
dependencies = [
"memchr",
- "once_cell",
"quickcheck",
"regex-automata",
"serde",
@@ -23,9 +22,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "getrandom"
-version = "0.2.8"
+version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
+checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
@@ -34,21 +33,24 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.138"
+version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8"
+checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "memchr"
-version = "2.5.0"
+version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+checksum = "f478948fd84d9f8e86967bf432640e46adfb5a4bd4f14ef7e864ab38220534ae"
[[package]]
-name = "once_cell"
-version = "1.16.0"
+name = "proc-macro2"
+version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+dependencies = [
+ "unicode-ident",
+]
[[package]]
name = "quickcheck"
@@ -60,6 +62,15 @@ dependencies = [
]
[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -78,47 +89,68 @@ dependencies = [
]
[[package]]
-name = "regex"
-version = "1.7.0"
+name = "regex-automata"
+version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
-dependencies = [
- "regex-syntax",
-]
+checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629"
[[package]]
-name = "regex-automata"
-version = "0.1.10"
+name = "regex-lite"
+version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2"
[[package]]
-name = "regex-syntax"
-version = "0.6.28"
+name = "serde"
+version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
+checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
+dependencies = [
+ "serde_derive",
+]
[[package]]
-name = "serde"
-version = "1.0.150"
+name = "serde_derive"
+version = "1.0.188"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e326c9ec8042f1b5da33252c8a37e9ffbd2c9bef0155215b6e6c80c790e05f91"
+checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
[[package]]
name = "ucd-parse"
-version = "0.1.10"
+version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc2d0556a998f4c55500ce1730901ba32bafbe820068cbdc091421525d61253b"
+checksum = "212c59636157b18c2f57eed2799e6606c52fc49c6a11685ffb0d08f06e55f428"
dependencies = [
- "once_cell",
- "regex",
+ "regex-lite",
]
[[package]]
+name = "unicode-ident"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+
+[[package]]
name = "unicode-segmentation"
-version = "1.10.0"
+version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a"
+checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "wasi"
diff --git a/vendor/bstr/Cargo.toml b/vendor/bstr/Cargo.toml
index f644ee2a8..2db4367c1 100644
--- a/vendor/bstr/Cargo.toml
+++ b/vendor/bstr/Cargo.toml
@@ -13,9 +13,12 @@
edition = "2021"
rust-version = "1.60"
name = "bstr"
-version = "1.5.0"
+version = "1.6.2"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
-exclude = ["/.github"]
+exclude = [
+ "/.github",
+ "/scripts",
+]
description = "A string type that is not required to be valid UTF-8."
homepage = "https://github.com/BurntSushi/bstr"
documentation = "https://docs.rs/bstr"
@@ -74,15 +77,12 @@ required-features = [
]
[dependencies.memchr]
-version = "2.4.0"
+version = "2.6.1"
default-features = false
-[dependencies.once_cell]
-version = "1.14.0"
-optional = true
-
[dependencies.regex-automata]
-version = "0.1.5"
+version = "0.3.0"
+features = ["dfa-search"]
optional = true
default-features = false
@@ -102,7 +102,10 @@ version = "0.1.3"
version = "1.2.1"
[features]
-alloc = ["serde?/alloc"]
+alloc = [
+ "memchr/alloc",
+ "serde?/alloc",
+]
default = [
"std",
"unicode",
@@ -113,7 +116,4 @@ std = [
"memchr/std",
"serde?/std",
]
-unicode = [
- "dep:once_cell",
- "dep:regex-automata",
-]
+unicode = ["dep:regex-automata"]
diff --git a/vendor/bstr/scripts/generate-unicode-data b/vendor/bstr/scripts/generate-unicode-data
deleted file mode 100755
index b8341c5a6..000000000
--- a/vendor/bstr/scripts/generate-unicode-data
+++ /dev/null
@@ -1,149 +0,0 @@
-#!/bin/sh
-
-set -e
-D="$(dirname "$0")"
-
-# Convenience function for checking that a command exists.
-requires() {
- cmd="$1"
- if ! command -v "$cmd" > /dev/null 2>&1; then
- echo "DEPENDENCY MISSING: $cmd must be installed" >&2
- exit 1
- fi
-}
-
-# Test if an array ($2) contains a particular element ($1).
-array_exists() {
- needle="$1"
- shift
-
- for el in "$@"; do
- if [ "$el" = "$needle" ]; then
- return 0
- fi
- done
- return 1
-}
-
-graphemes() {
- regex="$(sh "$D/regex/grapheme.sh")"
-
- echo "generating forward grapheme DFA"
- ucd-generate dfa \
- --name GRAPHEME_BREAK_FWD \
- --sparse --minimize --anchored --state-size 2 \
- src/unicode/fsm/ \
- "$regex"
-
- echo "generating reverse grapheme DFA"
- ucd-generate dfa \
- --name GRAPHEME_BREAK_REV \
- --reverse --longest \
- --sparse --minimize --anchored --state-size 2 \
- src/unicode/fsm/ \
- "$regex"
-}
-
-words() {
- regex="$(sh "$D/regex/word.sh")"
-
- echo "generating forward word DFA (this can take a while)"
- ucd-generate dfa \
- --name WORD_BREAK_FWD \
- --sparse --minimize --anchored --state-size 4 \
- src/unicode/fsm/ \
- "$regex"
-}
-
-sentences() {
- regex="$(sh "$D/regex/sentence.sh")"
-
- echo "generating forward sentence DFA (this can take a while)"
- ucd-generate dfa \
- --name SENTENCE_BREAK_FWD \
- --minimize \
- --sparse --anchored --state-size 4 \
- src/unicode/fsm/ \
- "$regex"
-}
-
-regional_indicator() {
- # For finding all occurrences of region indicators. This is used to handle
- # regional indicators as a special case for the reverse grapheme iterator
- # and the reverse word iterator.
- echo "generating regional indicator DFA"
- ucd-generate dfa \
- --name REGIONAL_INDICATOR_REV \
- --reverse \
- --classes --minimize --anchored --premultiply --state-size 1 \
- src/unicode/fsm/ \
- "\p{gcb=Regional_Indicator}"
-}
-
-simple_word() {
- echo "generating forward simple word DFA"
- ucd-generate dfa \
- --name SIMPLE_WORD_FWD \
- --sparse --minimize --state-size 2 \
- src/unicode/fsm/ \
- "\w"
-}
-
-whitespace() {
- echo "generating forward whitespace DFA"
- ucd-generate dfa \
- --name WHITESPACE_ANCHORED_FWD \
- --anchored --classes --premultiply --minimize --state-size 1 \
- src/unicode/fsm/ \
- "\s+"
-
- echo "generating reverse whitespace DFA"
- ucd-generate dfa \
- --name WHITESPACE_ANCHORED_REV \
- --reverse \
- --anchored --classes --premultiply --minimize --state-size 2 \
- src/unicode/fsm/ \
- "\s+"
-}
-
-main() {
- if array_exists "-h" "$@" || array_exists "--help" "$@"; then
- echo "Usage: $(basename "$0") [--list-commands] [<command>] ..." >&2
- exit
- fi
-
- commands="
- graphemes
- sentences
- words
- regional-indicator
- simple-word
- whitespace
- "
- if array_exists "--list-commands" "$@"; then
- for cmd in $commands; do
- echo "$cmd"
- done
- exit
- fi
-
- # ucd-generate is used to compile regexes into DFAs.
- requires ucd-generate
-
- mkdir -p src/unicode/fsm/
-
- cmds=$*
- if [ $# -eq 0 ] || array_exists "all" "$@"; then
- cmds=$commands
- fi
- for cmd in $cmds; do
- if array_exists "$cmd" $commands; then
- fun="$(echo "$cmd" | sed 's/-/_/g')"
- eval "$fun"
- else
- echo "unrecognized command: $cmd" >&2
- fi
- done
-}
-
-main "$@"
diff --git a/vendor/bstr/scripts/regex/grapheme.sh b/vendor/bstr/scripts/regex/grapheme.sh
deleted file mode 100644
index 0b2b54daa..000000000
--- a/vendor/bstr/scripts/regex/grapheme.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/sh
-
-# vim: indentexpr= nosmartindent autoindent
-# vim: tabstop=2 shiftwidth=2 softtabstop=2
-
-# This regex was manually written, derived from the rules in UAX #29.
-# Particularly, from Table 1c, which lays out a regex for grapheme clusters.
-
-CR="\p{gcb=CR}"
-LF="\p{gcb=LF}"
-Control="\p{gcb=Control}"
-Prepend="\p{gcb=Prepend}"
-L="\p{gcb=L}"
-V="\p{gcb=V}"
-LV="\p{gcb=LV}"
-LVT="\p{gcb=LVT}"
-T="\p{gcb=T}"
-RI="\p{gcb=RI}"
-Extend="\p{gcb=Extend}"
-ZWJ="\p{gcb=ZWJ}"
-SpacingMark="\p{gcb=SpacingMark}"
-
-Any="\p{any}"
-ExtendPict="\p{Extended_Pictographic}"
-
-echo "(?x)
-$CR $LF
-|
-$Control
-|
-$Prepend*
-(
- (
- ($L* ($V+ | $LV $V* | $LVT) $T*)
- |
- $L+
- |
- $T+
- )
- |
- $RI $RI
- |
- $ExtendPict ($Extend* $ZWJ $ExtendPict)*
- |
- [^$Control $CR $LF]
-)
-[$Extend $ZWJ $SpacingMark]*
-|
-$Any
-"
diff --git a/vendor/bstr/scripts/regex/sentence.sh b/vendor/bstr/scripts/regex/sentence.sh
deleted file mode 100644
index 689d1849f..000000000
--- a/vendor/bstr/scripts/regex/sentence.sh
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/bin/sh
-
-# vim: indentexpr= nosmartindent autoindent
-# vim: tabstop=2 shiftwidth=2 softtabstop=2
-
-# This is a regex that I reverse engineered from the sentence boundary chain
-# rules in UAX #29. Unlike the grapheme regex, which is essentially provided
-# for us in UAX #29, no such sentence regex exists.
-#
-# I looked into how ICU achieves this, since UAX #29 hints that producing
-# finite state machines for grapheme/sentence/word/line breaking is possible,
-# but only easy to do for graphemes. ICU does this by implementing their own
-# DSL for describing the break algorithms in terms of the chaining rules
-# directly. You can see an example for sentences in
-# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state
-# machine from those rules in a mostly standard way, but implements the
-# "chaining" aspect of the rules by connecting overlapping end and start
-# states. For example, given SB7:
-#
-# (Upper | Lower) ATerm x Upper
-#
-# Then the naive way to convert this into a regex would be something like
-#
-# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}
-#
-# Unfortunately, this is incorrect. Why? Well, consider an example like so:
-#
-# U.S.A.
-#
-# A correct implementation of the sentence breaking algorithm should not insert
-# any breaks here, exactly in accordance with repeatedly applying rule SB7 as
-# given above. Our regex fails to do this because it will first match `U.S`
-# without breaking them---which is correct---but will then start looking for
-# its next rule beginning with a full stop (in ATerm) and followed by an
-# uppercase letter (A). This will wind up triggering rule SB11 (without
-# matching `A`), which inserts a break.
-#
-# The reason why this happens is because our initial application of rule SB7
-# "consumes" the next uppercase letter (S), which we want to reuse as a prefix
-# in the next rule application. A natural way to express this would be with
-# look-around, although it's not clear that works in every case since you
-# ultimately might want to consume that ending uppercase letter. In any case,
-# we can't use look-around in our truly regular regexes, so we must fix this.
-# The approach we take is to explicitly repeat rules when a suffix of a rule
-# is a prefix of another rule. In the case of SB7, the end of the rule, an
-# uppercase letter, also happens to match the beginning of the rule. This can
-# in turn be repeated indefinitely. Thus, our actual translation to a regex is:
-#
-# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}*
-#
-# It turns out that this is exactly what ICU does, but in their case, they do
-# it automatically. In our case, we connect the chaining rules manually. It's
-# tedious. With that said, we do no implement Unicode line breaking with this
-# approach, which is a far scarier beast. In that case, it would probably be
-# worth writing the code to do what ICU does.
-#
-# In the case of sentence breaks, there aren't *too* many overlaps of this
-# nature. We list them out exhaustively to make this clear, because it's
-# essentially impossible to easily observe this in the regex. (It took me a
-# full day to figure all of this out.) Rules marked with N/A mean that they
-# specify a break, and this strategy only really applies to stringing together
-# non-breaks.
-#
-# SB1 - N/A
-# SB2 - N/A
-# SB3 - None
-# SB4 - N/A
-# SB5 - None
-# SB6 - None
-# SB7 - End overlaps with beginning of SB7
-# SB8 - End overlaps with beginning of SB7
-# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11
-# SB9 - None
-# SB10 - None
-# SB11 - None
-# SB998 - N/A
-#
-# SB8a is in particular quite tricky to get right without look-ahead, since it
-# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11
-# otherwise indicate that a break has been found. In the regex below, we tackle
-# this by only permitting part of SB8a to match inside our core non-breaking
-# repetition. In particular, we only allow the parts of SB8a to match that
-# permit the non-breaking components to continue. If a part of SB8a matches
-# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it
-# happen. This still isn't correct because an SContinue might be seen which
-# would allow moving back into SB998 and thus the non-breaking repetition, so
-# we handle that case as well.
-#
-# Finally, the last complication here is the sprinkling of $Ex* everywhere.
-# This essentially corresponds to the implementation of SB5 by following
-# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking
-# in the middle of a grapheme cluster.
-
-CR="\p{sb=CR}"
-LF="\p{sb=LF}"
-Sep="\p{sb=Sep}"
-Close="\p{sb=Close}"
-Sp="\p{sb=Sp}"
-STerm="\p{sb=STerm}"
-ATerm="\p{sb=ATerm}"
-SContinue="\p{sb=SContinue}"
-Numeric="\p{sb=Numeric}"
-Upper="\p{sb=Upper}"
-Lower="\p{sb=Lower}"
-OLetter="\p{sb=OLetter}"
-
-Ex="[\p{sb=Extend}\p{sb=Format}]"
-ParaSep="[$Sep $CR $LF]"
-SATerm="[$STerm $ATerm]"
-
-LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]"
-
-echo "(?x)
-(
- # SB6
- $ATerm $Ex*
- $Numeric
- |
- # SB7
- [$Upper $Lower] $Ex* $ATerm $Ex*
- $Upper $Ex*
- # overlap with SB7
- ($ATerm $Ex* $Upper $Ex*)*
- |
- # SB8
- $ATerm $Ex* $Close* $Ex* $Sp* $Ex*
- ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
- # overlap with SB7
- ($ATerm $Ex* $Upper $Ex*)*
- |
- # SB8a
- $SATerm $Ex* $Close* $Ex* $Sp* $Ex*
- (
- $SContinue
- |
- $ATerm $Ex*
- # Permit repetition of SB8a
- (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
- # In order to continue non-breaking matching, we now must observe
- # a match with a rule that keeps us in SB6-8a. Otherwise, we've entered
- # one of SB9-11 and know that a break must follow.
- (
- # overlap with SB6
- $Numeric
- |
- # overlap with SB8
- ($Close $Ex*)* ($Sp $Ex*)*
- ([^$LetterSepTerm] $Ex*)* $Lower $Ex*
- # overlap with SB7
- ($ATerm $Ex* $Upper $Ex*)*
- |
- # overlap with SB8a
- ($Close $Ex*)* ($Sp $Ex*)* $SContinue
- )
- |
- $STerm $Ex*
- # Permit repetition of SB8a
- (($Close $Ex*)* ($Sp $Ex*)* $SATerm)*
- # As with ATerm above, in order to continue non-breaking matching, we
- # must now observe a match with a rule that keeps us out of SB9-11.
- # For STerm, the only such possibility is to see an SContinue. Anything
- # else will result in a break.
- ($Close $Ex*)* ($Sp $Ex*)* $SContinue
- )
- |
- # SB998
- # The logic behind this catch-all is that if we get to this point and
- # see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of
- # SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since
- # SB11 acts as a catch-all to induce a break following a SATerm that isn't
- # handled by rules SB6-SB8a.
- [^$ParaSep $SATerm]
-)*
-# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11.
-($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)?
-"
diff --git a/vendor/bstr/scripts/regex/word.sh b/vendor/bstr/scripts/regex/word.sh
deleted file mode 100644
index 78c7a05cf..000000000
--- a/vendor/bstr/scripts/regex/word.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/sh
-
-# vim: indentexpr= nosmartindent autoindent
-# vim: tabstop=2 shiftwidth=2 softtabstop=2
-
-# See the comments in regex/sentence.sh for the general approach to how this
-# regex was written.
-#
-# Writing the regex for this was *hard*. It took me two days of hacking to get
-# this far, and that was after I had finished the sentence regex, so my brain
-# was fully cached on this. Unlike the sentence regex, the rules in the regex
-# below don't correspond as nicely to the rules in UAX #29. In particular, the
-# UAX #29 rules have a ton of overlap with each other, which requires crazy
-# stuff in the regex. I'm not even sure the regex below is 100% correct or even
-# minimal, however, I did compare this with the ICU word segmenter on a few
-# different corpora, and it produces identical results. (In addition to of
-# course passing the UCD tests.)
-#
-# In general, I consider this approach to be a failure. Firstly, this is
-# clearly a write-only regex. Secondly, building the minimized DFA for this is
-# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly,
-# reversing this regex (for reverse word iteration) results in a >19MB DFA.
-# Yes. That's MB. Wat. And it took 5 minutes to build.
-#
-# I think we might consider changing our approach to this problem. The normal
-# path I've seen, I think, is to decode codepoints one at a time, and then
-# thread them through a state machine in the code itself. We could take this
-# approach, or possibly combine it with a DFA that tells us which Word_Break
-# value a codepoint has. I'd prefer the latter approach, but it requires adding
-# RegexSet support to regex-automata. Something that should definitely be done,
-# but is a fair amount of work.
-#
-# Gah.
-
-CR="\p{wb=CR}"
-LF="\p{wb=LF}"
-Newline="\p{wb=Newline}"
-ZWJ="\p{wb=ZWJ}"
-RI="\p{wb=Regional_Indicator}"
-Katakana="\p{wb=Katakana}"
-HebrewLet="\p{wb=HebrewLetter}"
-ALetter="\p{wb=ALetter}"
-SingleQuote="\p{wb=SingleQuote}"
-DoubleQuote="\p{wb=DoubleQuote}"
-MidNumLet="\p{wb=MidNumLet}"
-MidLetter="\p{wb=MidLetter}"
-MidNum="\p{wb=MidNum}"
-Numeric="\p{wb=Numeric}"
-ExtendNumLet="\p{wb=ExtendNumLet}"
-WSegSpace="\p{wb=WSegSpace}"
-
-Any="\p{any}"
-Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]"
-ExtendPict="\p{Extended_Pictographic}"
-AHLetter="[$ALetter $HebrewLet]"
-MidNumLetQ="[$MidNumLet $SingleQuote]"
-
-AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*"
-NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*"
-
-echo "(?x)
-$CR $LF
-|
-[$Newline $CR $LF]
-|
-$WSegSpace $WSegSpace+
-|
-(
- ([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+
- |
- ($ExtendNumLet $Ex*)* $AHLetter $Ex*
- (
- (
- ($NumericRepeat | $ExtendNumLet $Ex*)*
- |
- [$MidLetter $MidNumLetQ] $Ex*
- )
- $AHLetter $Ex*
- )+
- ($NumericRepeat | $ExtendNumLet $Ex*)*
- |
- ($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+
- |
- ($ExtendNumLet $Ex*)* $Numeric $Ex*
- (
- (
- ($AHLetterRepeat | $ExtendNumLet $Ex*)*
- |
- [$MidNum $MidNumLetQ] $Ex*
- )
- $Numeric $Ex*
- )+
- ($AHLetterRepeat | $ExtendNumLet $Ex*)*
- |
- ($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+
- |
- $Katakana $Ex*
- (($Katakana | $ExtendNumLet) $Ex*)+
- |
- $ExtendNumLet $Ex*
- (($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+
-)+
-|
-$HebrewLet $Ex* $SingleQuote $Ex*
-|
-($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex*
-|
-$RI $Ex* $RI $Ex*
-|
-$Any $Ex*
-"
diff --git a/vendor/bstr/src/ext_slice.rs b/vendor/bstr/src/ext_slice.rs
index 5e1801971..503e0b258 100644
--- a/vendor/bstr/src/ext_slice.rs
+++ b/vendor/bstr/src/ext_slice.rs
@@ -3106,8 +3106,8 @@ impl<'a> Finder<'a> {
/// If this is already an owned finder, then this is a no-op. Otherwise,
/// this copies the needle.
///
- /// This is only available when the `std` feature is enabled.
- #[cfg(feature = "std")]
+ /// This is only available when the `alloc` feature is enabled.
+ #[cfg(feature = "alloc")]
#[inline]
pub fn into_owned(self) -> Finder<'static> {
Finder(self.0.into_owned())
@@ -3189,8 +3189,8 @@ impl<'a> FinderReverse<'a> {
/// If this is already an owned finder, then this is a no-op. Otherwise,
/// this copies the needle.
///
- /// This is only available when the `std` feature is enabled.
- #[cfg(feature = "std")]
+ /// This is only available when the `alloc` feature is enabled.
+ #[cfg(feature = "alloc")]
#[inline]
pub fn into_owned(self) -> FinderReverse<'static> {
FinderReverse(self.0.into_owned())
diff --git a/vendor/bstr/src/lib.rs b/vendor/bstr/src/lib.rs
index 8598e4458..47c624408 100644
--- a/vendor/bstr/src/lib.rs
+++ b/vendor/bstr/src/lib.rs
@@ -394,20 +394,6 @@ and Unicode support.
#![cfg_attr(not(any(feature = "std", test)), no_std)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
-// Why do we do this? Well, in order for us to use once_cell's 'Lazy' type to
-// load DFAs, it requires enabling its 'std' feature. Yet, there is really
-// nothing about our 'unicode' feature that requires 'std'. We could declare
-// that 'unicode = [std, ...]', which would be fine, but once regex-automata
-// 0.3 is a thing, I believe we can drop once_cell altogether and thus drop
-// the need for 'std' to be enabled when 'unicode' is enabled. But if we make
-// 'unicode' also enable 'std', then it would be a breaking change to remove
-// 'std' from that list.
-//
-// So, for right now, we force folks to explicitly say they want 'std' if they
-// want 'unicode'. In the future, we should be able to relax this.
-#[cfg(all(feature = "unicode", not(feature = "std")))]
-compile_error!("enabling 'unicode' requires enabling 'std'");
-
#[cfg(feature = "alloc")]
extern crate alloc;
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
index 31f99c1f5..c4321e2c9 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
index 3a51728bb..3b9905da9 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
index dea4a7e3e..ccba7d952 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe GRAPHEME_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static GRAPHEME_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
index 742d2a6a2..5d7d34a01 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
index d1937f26c..7472844d4 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
index 2d2cd542f..e5619b2f0 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --reverse --match-kind all --no-captures --shrink --rustfmt --safe GRAPHEME_BREAK_REV src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static GRAPHEME_BREAK_REV: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_rev.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_rev.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
index 1a3357f71..1f830009a 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
index e437aae3a..b0db99dd1 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
index db7a40fcd..af1c73051 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe REGIONAL_INDICATOR_REV src/unicode/fsm/ \p{gcb=Regional_Indicator}
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static REGIONAL_INDICATOR_REV: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
index 1abdae880..d4bd841e5 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
index 2f8aadd30..df1e9521d 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
index 97dd658e4..0f22eeefb 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SENTENCE_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static SENTENCE_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("sentence_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("sentence_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
index 888e46599..08981bdd0 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
index a1d527c74..daf01d60c 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
index 32b69b611..97bd006fb 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SIMPLE_WORD_FWD src/unicode/fsm/ \w
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static SIMPLE_WORD_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("simple_word_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("simple_word_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
index bcfc4e9a1..fe32b0d98 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
index d534a464a..0179b66f7 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
index 0780412ae..193deb0ca 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --shrink --rustfmt --safe WHITESPACE_ANCHORED_FWD src/unicode/fsm/ \s+
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static WHITESPACE_ANCHORED_FWD: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
index 427d3a922..cf8ef736b 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
index 7cc3a0a99..740fcd040 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
index 3d0d7a661..2eb98c0bd 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe WHITESPACE_ANCHORED_REV src/unicode/fsm/ \s+
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u16], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u16; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u16], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u16; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static WHITESPACE_ANCHORED_REV: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
index efb9c8198..6cca67ff6 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
index 9a716d060..d8bfd0126 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
index dcb5f6bce..825782f1c 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe WORD_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static WORD_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("word_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("word_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs
index 13b730c48..8a701be98 100644
--- a/vendor/bstr/src/unicode/grapheme.rs
+++ b/vendor/bstr/src/unicode/grapheme.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
// Safe because all ASCII bytes are valid UTF-8.
let grapheme = unsafe { bs[..1].to_str_unchecked() };
(grapheme, 1)
- } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() };
(grapheme, grapheme.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
@@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
- start = adjust_rev_for_regional_indicator(bs, start);
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap()
+ } {
+ let start = adjust_rev_for_regional_indicator(bs, hm.offset());
// Safe because a match can only occur for valid UTF-8.
let grapheme = unsafe { bs[start..].to_str_unchecked() };
(grapheme, grapheme.len())
@@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
// regional indicator codepoints. A fix probably requires refactoring this
// code a bit such that we don't rescan regional indicators.
let mut count = 0;
- while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
- bs = &bs[..start];
+ while let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap()
+ } {
+ bs = &bs[..hm.offset()];
count += 1;
}
if count % 2 == 0 {
diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs
index ff29c7e25..0baf4dfeb 100644
--- a/vendor/bstr/src/unicode/sentence.rs
+++ b/vendor/bstr/src/unicode/sentence.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -145,9 +145,12 @@ impl<'a> Iterator for SentenceIndices<'a> {
fn decode_sentence(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ SENTENCE_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let sentence = unsafe { bs[..end].to_str_unchecked() };
+ let sentence = unsafe { bs[..hm.offset()].to_str_unchecked() };
(sentence, sentence.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs
index b5eff300e..bf1f47c8e 100644
--- a/vendor/bstr/src/unicode/whitespace.rs
+++ b/vendor/bstr/src/unicode/whitespace.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::unicode::fsm::{
whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD,
@@ -7,10 +7,18 @@ use crate::unicode::fsm::{
/// Return the first position of a non-whitespace character.
pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
- WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
+ let input = Input::new(slice).anchored(Anchored::Yes);
+ WHITESPACE_ANCHORED_FWD
+ .try_search_fwd(&input)
+ .unwrap()
+ .map_or(0, |hm| hm.offset())
}
/// Return the last position of a non-whitespace character.
pub fn whitespace_len_rev(slice: &[u8]) -> usize {
- WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
+ let input = Input::new(slice).anchored(Anchored::Yes);
+ WHITESPACE_ANCHORED_REV
+ .try_search_rev(&input)
+ .unwrap()
+ .map_or(slice.len(), |hm| hm.offset())
}
diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs
index 849f0c8e2..d6bf0f6f3 100644
--- a/vendor/bstr/src/unicode/word.rs
+++ b/vendor/bstr/src/unicode/word.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -67,7 +67,9 @@ impl<'a> Iterator for Words<'a> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
while let Some(word) = self.0.next() {
- if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ let input =
+ Input::new(word).anchored(Anchored::Yes).earliest(true);
+ if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() {
return Some(word);
}
}
@@ -143,7 +145,9 @@ impl<'a> Iterator for WordIndices<'a> {
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
while let Some((start, end, word)) = self.0.next() {
- if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ let input =
+ Input::new(word).anchored(Anchored::Yes).earliest(true);
+ if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() {
return Some((start, end, word));
}
}
@@ -307,9 +311,12 @@ impl<'a> Iterator for WordsWithBreakIndices<'a> {
fn decode_word(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(end) = WORD_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ WORD_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let word = unsafe { bs[..end].to_str_unchecked() };
+ let word = unsafe { bs[..hm.offset()].to_str_unchecked() };
(word, word.len())
} else {
const INVALID: &'static str = "\u{FFFD}";