From c23a457e72abe608715ac76f076f47dc42af07a5 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 30 May 2024 20:31:44 +0200 Subject: Merging upstream version 1.74.1+dfsg1. Signed-off-by: Daniel Baumann --- vendor/bstr/.cargo-checksum.json | 2 +- vendor/bstr/Cargo.lock | 96 +++++++---- vendor/bstr/Cargo.toml | 26 +-- vendor/bstr/scripts/generate-unicode-data | 149 ----------------- vendor/bstr/scripts/regex/grapheme.sh | 50 ------ vendor/bstr/scripts/regex/sentence.sh | 176 --------------------- vendor/bstr/scripts/regex/word.sh | 111 ------------- vendor/bstr/src/ext_slice.rs | 8 +- vendor/bstr/src/lib.rs | 14 -- .../unicode/fsm/grapheme_break_fwd.bigendian.dfa | Bin 10781 -> 22420 bytes .../fsm/grapheme_break_fwd.littleendian.dfa | Bin 10781 -> 22420 bytes vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs | 48 ++---- .../unicode/fsm/grapheme_break_rev.bigendian.dfa | Bin 55271 -> 90997 bytes .../fsm/grapheme_break_rev.littleendian.dfa | Bin 55271 -> 90997 bytes vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs | 48 ++---- .../fsm/regional_indicator_rev.bigendian.dfa | Bin 366 -> 1240 bytes .../fsm/regional_indicator_rev.littleendian.dfa | Bin 366 -> 1240 bytes .../bstr/src/unicode/fsm/regional_indicator_rev.rs | 55 +++---- .../unicode/fsm/sentence_break_fwd.bigendian.dfa | Bin 153619 -> 200879 bytes .../fsm/sentence_break_fwd.littleendian.dfa | Bin 153619 -> 200879 bytes vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs | 48 ++---- .../src/unicode/fsm/simple_word_fwd.bigendian.dfa | Bin 9237 -> 11095 bytes .../unicode/fsm/simple_word_fwd.littleendian.dfa | Bin 9237 -> 11095 bytes vendor/bstr/src/unicode/fsm/simple_word_fwd.rs | 48 ++---- .../fsm/whitespace_anchored_fwd.bigendian.dfa | Bin 572 -> 2964 bytes .../fsm/whitespace_anchored_fwd.littleendian.dfa | Bin 572 -> 2964 bytes .../src/unicode/fsm/whitespace_anchored_fwd.rs | 55 +++---- .../fsm/whitespace_anchored_rev.bigendian.dfa | Bin 884 -> 3232 bytes .../fsm/whitespace_anchored_rev.littleendian.dfa | Bin 884 -> 3232 bytes .../src/unicode/fsm/whitespace_anchored_rev.rs | 55 +++---- .../src/unicode/fsm/word_break_fwd.bigendian.dfa | Bin 236309 -> 299263 bytes .../unicode/fsm/word_break_fwd.littleendian.dfa | Bin 236309 -> 299263 bytes vendor/bstr/src/unicode/fsm/word_break_fwd.rs | 48 ++---- vendor/bstr/src/unicode/grapheme.rs | 23 ++- vendor/bstr/src/unicode/sentence.rs | 9 +- vendor/bstr/src/unicode/whitespace.rs | 14 +- vendor/bstr/src/unicode/word.rs | 17 +- 37 files changed, 249 insertions(+), 851 deletions(-) delete mode 100755 vendor/bstr/scripts/generate-unicode-data delete mode 100644 vendor/bstr/scripts/regex/grapheme.sh delete mode 100644 vendor/bstr/scripts/regex/sentence.sh delete mode 100644 vendor/bstr/scripts/regex/word.sh (limited to 'vendor/bstr') diff --git a/vendor/bstr/.cargo-checksum.json b/vendor/bstr/.cargo-checksum.json index 90c0ab073..04cad9221 100644 --- a/vendor/bstr/.cargo-checksum.json +++ b/vendor/bstr/.cargo-checksum.json @@ -1 +1 @@ -{"files":{"COPYING":"68653aaa727a2bfa31b7a751e31701ce33c49d695c12dd291a07d1c54da4c14b","Cargo.lock":"89aebb25a85f875bc2ccc3a90cf4a2c86dff11f95ff90ddb1f208d65a9fc85fb","Cargo.toml":"665a9c704132b7dd3b6dc75ba3ca772cf8bc5a28f33012943af94ca1bc4f85ad","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6b7374c39a57e57fc2c38eb529c4c88340152b10f51dd5ae2d819dfa67f61715","README.md":"5e20af8472e06926761584e3c249ebc8b9802f1eb13440276d9aa267e70e5955","examples/graphemes-std.rs":"100264f623ff973be76831fb1d4519e6f371b21972d6577bb49bf7bbff4d0d5e","examples/graphemes.rs":"401c5fac813f78e4029ece9c98bccb3128637c507d8667b73e069bfbc9d7f2f4","examples/lines-std.rs":"094a48bfd483ec01f80f9c937ddfe6f0bdbf09f960ba822215ec8ed9862624df","examples/lines.rs":"65ae4edbdb0ccff8ff40cdc70b4e7a70824f5028daff2e1b2a3247f884589db8","examples/uppercase-std.rs":"33aed88e38483aa303625757304a974594476a3a659d8bdd4877aceb90ff8be3","examples/uppercase.rs":"2cdf7f173cb6a5d4c16a967e3f733bc40331f5167da519c5194ceee187ff814f","examples/words-std.rs":"ffde2fccd361890fab0e0b051915a749d5d51e95b9be700b76fada231d002f00","examples/words.rs":"aa805faa5012714428ef895596947c333417c2b16a7e0155d4a128be7428fc17","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/generate-unicode-data":"75d68617b5acf564cc681ddfbf77a6097eaa9a9f8f54af4e16905dda3dc6df77","scripts/regex/grapheme.sh":"d796bca73278f6ab04d65f285b2dc72efcad76874f98f4bfa22bf38f2eaeece7","scripts/regex/sentence.sh":"7892e07ac9e450967bd79279558cbef2f0fc0b0aefab24217910937ed6330f06","scripts/regex/word.sh":"b3f53e2331e9e50a1a7232b7d278aaecace6a42ef6c16dd0b8d0ec59fd2aaf4f","src/ascii.rs":"5aae67428421ad9e2156d7b27f5931bd924056b9af073ec53c44ef8e2d29cc8a","src/bstr.rs":"be1313d13814f3818068f1f6c96e4a1eecf1ecdec42c360f90379596804ea0ef","src/bstring.rs":"1cd7656dc3a6eded742eb7e9e43c83a5d020e6a419060c186788c8e1010f7dcc","src/byteset/mod.rs":"3f88d5594d95724c6eda96f79006a27dab80c4f849f00342b0bae3efedc32c45","src/byteset/scalar.rs":"fb84085b30d2901407877eb306828574eaf0dea907bbb7f02c36f24f0dc6f7b6","src/escape_bytes.rs":"207b7b92398912f940f911cfaafca04b7c62dac1106063ae2c7bd0676238b4cf","src/ext_slice.rs":"9e849981a4b4288b3d1237372847c81a6068186f041c8b04cab901a87a2bcc82","src/ext_vec.rs":"4dd9af267c07554051787c752e22e091684eb93f1a70c70ac2537535d1a54a07","src/impls.rs":"cd585f19d96f06b35cb17a7a8479f14f6b624beecf01501ea077f4b8eaacced4","src/io.rs":"73afcb89230d940b17a5917696c3f7c55267aefcb42db4164062dbf18875b677","src/lib.rs":"b4433d15492fe85983cbe1bf23249d1dac54aa5df4510989939933a43bfd07f9","src/tests.rs":"8adfd1a4a9da91b2a4dff25ffafcf99d914be3f5b7d67d66cdcb40a2d72abd04","src/unicode/data/GraphemeBreakTest.txt":"ddc7d4d1f3838573b94fc5d83ff7217e63c47b22ae1cd40c5fe1a54efc15589b","src/unicode/data/LICENSE-UNICODE":"8b9babb256418ec15761d635a49f973424939affba7a0a88de2fc2690e454a23","src/unicode/data/SentenceBreakTest.txt":"7e42dd749dbb94aa44b13faf9df6319d9a16ce2ea09a3a094fcfbb5962168040","src/unicode/data/WordBreakTest.txt":"8094b544ec1580c7e41ac0187805cc1aeb330a90301ec7505563e1a59318284e","src/unicode/fsm/grapheme_break_fwd.bigendian.dfa":"ae5220a77570720fcf78e63794d4cddbeef365fc3aaeec7dde391c229bc0a840","src/unicode/fsm/grapheme_break_fwd.littleendian.dfa":"3f9ce5d78325ede1651587e24e12357740a90608c784ac59c643abd42c4d9a83","src/unicode/fsm/grapheme_break_fwd.rs":"b6d937ec3afee23ea7c01ff9c0eeff1fc4f85287b87659dca80765db49d6b09e","src/unicode/fsm/grapheme_break_rev.bigendian.dfa":"fa2c745adc61060f08e5734f19acc09de387b0abd671597a543b4d4d80fd7a04","src/unicode/fsm/grapheme_break_rev.littleendian.dfa":"a10fd82f63b0f0aa08e5e7f09000c020c7ff4cfe6240afb11a615c663100de99","src/unicode/fsm/grapheme_break_rev.rs":"d9de2be51a17c5be37142ac44b9e2f0627c05a9101d5b1e23fd78229ca0ef75d","src/unicode/fsm/mod.rs":"50b8baa692e83f909a0fe62eced9666b712a68b6c7bf42976c8cc37e49dd9b64","src/unicode/fsm/regional_indicator_rev.bigendian.dfa":"db9db4c86bced5f4aaf68d5e475e13e5d4976c237deec13c192111a399aa5858","src/unicode/fsm/regional_indicator_rev.littleendian.dfa":"0905f70acddd423c1b53bfbeb73299009f724400029d7f9a987d63c32d36e36c","src/unicode/fsm/regional_indicator_rev.rs":"50b89fc6f7d461c789e88cc6f1a769257104b7f45eb01bd31047e898f1e9587a","src/unicode/fsm/sentence_break_fwd.bigendian.dfa":"0cd36026a86ea5d2e4710b8278733982808e341c88b62c4f9ca309417a181dc9","src/unicode/fsm/sentence_break_fwd.littleendian.dfa":"f3b85da014d1c94e1b444f3fca2952d1a5fbf2a9f42e32574eb52e027a797281","src/unicode/fsm/sentence_break_fwd.rs":"2c6147825fd78c15ecdb952d368d519f81bbf196eedf3e90e927699e832c7080","src/unicode/fsm/simple_word_fwd.bigendian.dfa":"635ab3e9c589268ef91a48c8b9b038e156deaf4a9a4475fce49ca75eabddccf7","src/unicode/fsm/simple_word_fwd.littleendian.dfa":"4f92b789385027a9276498a829cc8e5a3ecdd5f3c6d88254c6cd23d95d828c57","src/unicode/fsm/simple_word_fwd.rs":"44a2b90c8b4a2fa50c66cacc1d48afd47a8f7aa4753dd391471b48a9a891be71","src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa":"593c8ad059ab0bee60a2ea25f4c1fc89c105cb19a9bda3fa98d1464b8e87cfc0","src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa":"a04ed70d5dbd969c0af0e12bec5033ca910161c486f741dd0a792d2e5b0cc6f6","src/unicode/fsm/whitespace_anchored_fwd.rs":"e0f3f0be717ff306409ea9242f507847c4c0fa7469eccbd98a849389afe7fd26","src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa":"9ab09359ce73058d22e5bfa857e040831d49f4a53dd25da804136e9db9e7f5fb","src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa":"cb5804786bd98bfe0726f3f7733d72bc1d69130c8a8b026465c03c78f8c8ac79","src/unicode/fsm/whitespace_anchored_rev.rs":"8f27a50dfe549db99e768225c2956489f233f5a4986857a5ff5f2b507375a69d","src/unicode/fsm/word_break_fwd.bigendian.dfa":"72278d401ac119d50e06c6b8a4cb29d54366d1da536cfaedc3a20bb8cc1b742c","src/unicode/fsm/word_break_fwd.littleendian.dfa":"97dcdca86472d96faadd15d1c0328a76484971f3c4ad409c29049791cb9ed3eb","src/unicode/fsm/word_break_fwd.rs":"3ce6b28015dd6f2b330759da46263e8227f921e2370d4aa6922b8be26db558a8","src/unicode/grapheme.rs":"9bac56709754b48d42ee35282d5752c554a5af3e70b08c01977872f70ffa2afd","src/unicode/mod.rs":"fc67b0d64e9395398235c8663706b246edc0742e6cfe87057eeabdb1b19ad28d","src/unicode/sentence.rs":"8af1f274f80120b04928a6e560bfb3f2c9176d882f729265494b1a796a370681","src/unicode/whitespace.rs":"1fe313906fce009060267ae14de10e0ce577f0e2e2018273ee79d008dc9cf2f5","src/unicode/word.rs":"ed1bef53cf01ef6e682898e802e1654356a763c3993b4f16898eb5ed4b5e7637","src/utf8.rs":"e759713023dc3e5f9f5b2e6c3ba601af591ce5b2ad71aba729e3c29bcf6007e3"},"package":"a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"} \ No newline at end of file +{"files":{"COPYING":"68653aaa727a2bfa31b7a751e31701ce33c49d695c12dd291a07d1c54da4c14b","Cargo.lock":"c36bf13a0db7e61dafd933b6b6c00ab9937ec0dedd86e258cec9f77398ffe7ed","Cargo.toml":"98e6de7edd0e320aace57321c5ca2527c79ad18d68978fba20e3274dbf6b484d","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6b7374c39a57e57fc2c38eb529c4c88340152b10f51dd5ae2d819dfa67f61715","README.md":"5e20af8472e06926761584e3c249ebc8b9802f1eb13440276d9aa267e70e5955","examples/graphemes-std.rs":"100264f623ff973be76831fb1d4519e6f371b21972d6577bb49bf7bbff4d0d5e","examples/graphemes.rs":"401c5fac813f78e4029ece9c98bccb3128637c507d8667b73e069bfbc9d7f2f4","examples/lines-std.rs":"094a48bfd483ec01f80f9c937ddfe6f0bdbf09f960ba822215ec8ed9862624df","examples/lines.rs":"65ae4edbdb0ccff8ff40cdc70b4e7a70824f5028daff2e1b2a3247f884589db8","examples/uppercase-std.rs":"33aed88e38483aa303625757304a974594476a3a659d8bdd4877aceb90ff8be3","examples/uppercase.rs":"2cdf7f173cb6a5d4c16a967e3f733bc40331f5167da519c5194ceee187ff814f","examples/words-std.rs":"ffde2fccd361890fab0e0b051915a749d5d51e95b9be700b76fada231d002f00","examples/words.rs":"aa805faa5012714428ef895596947c333417c2b16a7e0155d4a128be7428fc17","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/ascii.rs":"5aae67428421ad9e2156d7b27f5931bd924056b9af073ec53c44ef8e2d29cc8a","src/bstr.rs":"be1313d13814f3818068f1f6c96e4a1eecf1ecdec42c360f90379596804ea0ef","src/bstring.rs":"1cd7656dc3a6eded742eb7e9e43c83a5d020e6a419060c186788c8e1010f7dcc","src/byteset/mod.rs":"3f88d5594d95724c6eda96f79006a27dab80c4f849f00342b0bae3efedc32c45","src/byteset/scalar.rs":"fb84085b30d2901407877eb306828574eaf0dea907bbb7f02c36f24f0dc6f7b6","src/escape_bytes.rs":"207b7b92398912f940f911cfaafca04b7c62dac1106063ae2c7bd0676238b4cf","src/ext_slice.rs":"e0dff7cf2fa384d9dccf8c720b9867003f258a5dc6011b0debe80d68dae35c1f","src/ext_vec.rs":"4dd9af267c07554051787c752e22e091684eb93f1a70c70ac2537535d1a54a07","src/impls.rs":"cd585f19d96f06b35cb17a7a8479f14f6b624beecf01501ea077f4b8eaacced4","src/io.rs":"73afcb89230d940b17a5917696c3f7c55267aefcb42db4164062dbf18875b677","src/lib.rs":"5e665152491cc9ce3a1f2cee3f235704b49e32017c691b45a371714aa6969d1e","src/tests.rs":"8adfd1a4a9da91b2a4dff25ffafcf99d914be3f5b7d67d66cdcb40a2d72abd04","src/unicode/data/GraphemeBreakTest.txt":"ddc7d4d1f3838573b94fc5d83ff7217e63c47b22ae1cd40c5fe1a54efc15589b","src/unicode/data/LICENSE-UNICODE":"8b9babb256418ec15761d635a49f973424939affba7a0a88de2fc2690e454a23","src/unicode/data/SentenceBreakTest.txt":"7e42dd749dbb94aa44b13faf9df6319d9a16ce2ea09a3a094fcfbb5962168040","src/unicode/data/WordBreakTest.txt":"8094b544ec1580c7e41ac0187805cc1aeb330a90301ec7505563e1a59318284e","src/unicode/fsm/grapheme_break_fwd.bigendian.dfa":"ed5aa2efd017d8815d58ffc1dc65525948f8ed003d81ac891f78ee04181dca81","src/unicode/fsm/grapheme_break_fwd.littleendian.dfa":"31c832b147705f1d144e43d117fdde35092fe569bbe7dcc97e5961fe6860791e","src/unicode/fsm/grapheme_break_fwd.rs":"2b35935d19226ccd10f26633f3c6b6c3fc61bf00fdefd314fe350d3dbb333ee4","src/unicode/fsm/grapheme_break_rev.bigendian.dfa":"ab3b82ed1bbfdbb7d7da178d7ed7f4fd5f66d20dc4688f5643abd89b9b4fe0ea","src/unicode/fsm/grapheme_break_rev.littleendian.dfa":"e30d2863ffa26181d736c3a3e8df9bcad104360bd2eed5bab23473f4ec9287ae","src/unicode/fsm/grapheme_break_rev.rs":"1da158d1f12c6313e227357d00ad56c8718792cd4d2b54ce3fed9446cd4b370e","src/unicode/fsm/mod.rs":"50b8baa692e83f909a0fe62eced9666b712a68b6c7bf42976c8cc37e49dd9b64","src/unicode/fsm/regional_indicator_rev.bigendian.dfa":"97b61dc64bfbf612d5ff5a17b126b9e3afd303fb36f5aa2f4a199b6f3b7cced5","src/unicode/fsm/regional_indicator_rev.littleendian.dfa":"5b0e114dc14d7dc46a8398f98018a630df33ccff8b909ddcd44a0da8d2ad20d1","src/unicode/fsm/regional_indicator_rev.rs":"c973a021197982282dfb12cd918185836795e44914daea19df4b4e4a156ae680","src/unicode/fsm/sentence_break_fwd.bigendian.dfa":"0dd4dfa025a932862e6d04a9fdc0aa25c26ac1d9d1a0baa05651d851c1fa4718","src/unicode/fsm/sentence_break_fwd.littleendian.dfa":"cb37cd218f2714928916640154a0803d257e114271ff0ea90055bc0f4e8a2d41","src/unicode/fsm/sentence_break_fwd.rs":"3e3708f16992e3b157b29304e5dce47fcadbf5db1d0b85828f852fbbb7a9da09","src/unicode/fsm/simple_word_fwd.bigendian.dfa":"27fefb1ab8f0cd6d4d3b877832122c2e35d7921eaf29c0e6de96ff51611b20f8","src/unicode/fsm/simple_word_fwd.littleendian.dfa":"a9340c695ed76aa37353ce8f337e84e08714754b9ccbba64fcfaddb9a32c4e87","src/unicode/fsm/simple_word_fwd.rs":"59b818f24363d622d8a4bf40872e3648b582779861d4ce7a7a0b8a2f2d04d721","src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa":"9a8886693ceb7616d8838e370e1fe5a2a9aafed50891278f70dd443a5d72e4d4","src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa":"661745fc2ca838e2ebb10dba01c8dd5cd72b264ef71b2b343196a0bcf5daaac7","src/unicode/fsm/whitespace_anchored_fwd.rs":"77183ab0bc353aa688ca0c829dbef011eeab8b9eff07ef861c92ac905f9d64ba","src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa":"cf5f2fb71f3ea6eb2f6349c039f00d9cc72b9fa949910594c1453c5fa8b2f4da","src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa":"791825312415cbd24aca8b8262ded3e1fabc785e3eba3660f65019296f8e5f58","src/unicode/fsm/whitespace_anchored_rev.rs":"0e98166e69ba84eb7f5e7ce504e4aa8cb9aa3043f7f95629b1432daeb9e55678","src/unicode/fsm/word_break_fwd.bigendian.dfa":"54908d18964ef017b909594af6ce976af9a22698845a858234724e747240462d","src/unicode/fsm/word_break_fwd.littleendian.dfa":"c7977465637868e286fcb818b4cab7349a9c1612f4b0a1d06b0ba5f2d8281018","src/unicode/fsm/word_break_fwd.rs":"ce643936af3b8a8a91b8ee8252f08a080a2041ef6784e7c46ab9b4d5acee463c","src/unicode/grapheme.rs":"a42b8b61c9dfc05c3c15e311bfef7a36f5f4222d9f044d1d990259591a60a103","src/unicode/mod.rs":"fc67b0d64e9395398235c8663706b246edc0742e6cfe87057eeabdb1b19ad28d","src/unicode/sentence.rs":"85bab665e0be53957743211cb4956060b6087716127251a3f962a57e87cd2775","src/unicode/whitespace.rs":"f4961ea2020edb6bb382a56e7ad91ee9b221670842669c5e070f19995840abaa","src/unicode/word.rs":"73eef99becfd0697f55500052df057c45a3aa9393839e67f77dbd227a7ea88f4","src/utf8.rs":"e759713023dc3e5f9f5b2e6c3ba601af591ce5b2ad71aba729e3c29bcf6007e3"},"package":"4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"} \ No newline at end of file diff --git a/vendor/bstr/Cargo.lock b/vendor/bstr/Cargo.lock index 7d9e05482..6d3746ae3 100644 --- a/vendor/bstr/Cargo.lock +++ b/vendor/bstr/Cargo.lock @@ -4,10 +4,9 @@ version = 3 [[package]] name = "bstr" -version = "1.5.0" +version = "1.6.2" dependencies = [ "memchr", - "once_cell", "quickcheck", "regex-automata", "serde", @@ -23,9 +22,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -34,21 +33,24 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.138" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f478948fd84d9f8e86967bf432640e46adfb5a4bd4f14ef7e864ab38220534ae" [[package]] -name = "once_cell" -version = "1.16.0" +name = "proc-macro2" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] [[package]] name = "quickcheck" @@ -59,6 +61,15 @@ dependencies = [ "rand", ] +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + [[package]] name = "rand" version = "0.8.5" @@ -78,47 +89,68 @@ dependencies = [ ] [[package]] -name = "regex" -version = "1.7.0" +name = "regex-automata" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" -dependencies = [ - "regex-syntax", -] +checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" [[package]] -name = "regex-automata" -version = "0.1.10" +name = "regex-lite" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" [[package]] -name = "regex-syntax" -version = "0.6.28" +name = "serde" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] [[package]] -name = "serde" -version = "1.0.150" +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e326c9ec8042f1b5da33252c8a37e9ffbd2c9bef0155215b6e6c80c790e05f91" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] [[package]] name = "ucd-parse" -version = "0.1.10" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc2d0556a998f4c55500ce1730901ba32bafbe820068cbdc091421525d61253b" +checksum = "212c59636157b18c2f57eed2799e6606c52fc49c6a11685ffb0d08f06e55f428" dependencies = [ - "once_cell", - "regex", + "regex-lite", ] +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + [[package]] name = "unicode-segmentation" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "wasi" diff --git a/vendor/bstr/Cargo.toml b/vendor/bstr/Cargo.toml index f644ee2a8..2db4367c1 100644 --- a/vendor/bstr/Cargo.toml +++ b/vendor/bstr/Cargo.toml @@ -13,9 +13,12 @@ edition = "2021" rust-version = "1.60" name = "bstr" -version = "1.5.0" +version = "1.6.2" authors = ["Andrew Gallant "] -exclude = ["/.github"] +exclude = [ + "/.github", + "/scripts", +] description = "A string type that is not required to be valid UTF-8." homepage = "https://github.com/BurntSushi/bstr" documentation = "https://docs.rs/bstr" @@ -74,15 +77,12 @@ required-features = [ ] [dependencies.memchr] -version = "2.4.0" +version = "2.6.1" default-features = false -[dependencies.once_cell] -version = "1.14.0" -optional = true - [dependencies.regex-automata] -version = "0.1.5" +version = "0.3.0" +features = ["dfa-search"] optional = true default-features = false @@ -102,7 +102,10 @@ version = "0.1.3" version = "1.2.1" [features] -alloc = ["serde?/alloc"] +alloc = [ + "memchr/alloc", + "serde?/alloc", +] default = [ "std", "unicode", @@ -113,7 +116,4 @@ std = [ "memchr/std", "serde?/std", ] -unicode = [ - "dep:once_cell", - "dep:regex-automata", -] +unicode = ["dep:regex-automata"] diff --git a/vendor/bstr/scripts/generate-unicode-data b/vendor/bstr/scripts/generate-unicode-data deleted file mode 100755 index b8341c5a6..000000000 --- a/vendor/bstr/scripts/generate-unicode-data +++ /dev/null @@ -1,149 +0,0 @@ -#!/bin/sh - -set -e -D="$(dirname "$0")" - -# Convenience function for checking that a command exists. -requires() { - cmd="$1" - if ! command -v "$cmd" > /dev/null 2>&1; then - echo "DEPENDENCY MISSING: $cmd must be installed" >&2 - exit 1 - fi -} - -# Test if an array ($2) contains a particular element ($1). -array_exists() { - needle="$1" - shift - - for el in "$@"; do - if [ "$el" = "$needle" ]; then - return 0 - fi - done - return 1 -} - -graphemes() { - regex="$(sh "$D/regex/grapheme.sh")" - - echo "generating forward grapheme DFA" - ucd-generate dfa \ - --name GRAPHEME_BREAK_FWD \ - --sparse --minimize --anchored --state-size 2 \ - src/unicode/fsm/ \ - "$regex" - - echo "generating reverse grapheme DFA" - ucd-generate dfa \ - --name GRAPHEME_BREAK_REV \ - --reverse --longest \ - --sparse --minimize --anchored --state-size 2 \ - src/unicode/fsm/ \ - "$regex" -} - -words() { - regex="$(sh "$D/regex/word.sh")" - - echo "generating forward word DFA (this can take a while)" - ucd-generate dfa \ - --name WORD_BREAK_FWD \ - --sparse --minimize --anchored --state-size 4 \ - src/unicode/fsm/ \ - "$regex" -} - -sentences() { - regex="$(sh "$D/regex/sentence.sh")" - - echo "generating forward sentence DFA (this can take a while)" - ucd-generate dfa \ - --name SENTENCE_BREAK_FWD \ - --minimize \ - --sparse --anchored --state-size 4 \ - src/unicode/fsm/ \ - "$regex" -} - -regional_indicator() { - # For finding all occurrences of region indicators. This is used to handle - # regional indicators as a special case for the reverse grapheme iterator - # and the reverse word iterator. - echo "generating regional indicator DFA" - ucd-generate dfa \ - --name REGIONAL_INDICATOR_REV \ - --reverse \ - --classes --minimize --anchored --premultiply --state-size 1 \ - src/unicode/fsm/ \ - "\p{gcb=Regional_Indicator}" -} - -simple_word() { - echo "generating forward simple word DFA" - ucd-generate dfa \ - --name SIMPLE_WORD_FWD \ - --sparse --minimize --state-size 2 \ - src/unicode/fsm/ \ - "\w" -} - -whitespace() { - echo "generating forward whitespace DFA" - ucd-generate dfa \ - --name WHITESPACE_ANCHORED_FWD \ - --anchored --classes --premultiply --minimize --state-size 1 \ - src/unicode/fsm/ \ - "\s+" - - echo "generating reverse whitespace DFA" - ucd-generate dfa \ - --name WHITESPACE_ANCHORED_REV \ - --reverse \ - --anchored --classes --premultiply --minimize --state-size 2 \ - src/unicode/fsm/ \ - "\s+" -} - -main() { - if array_exists "-h" "$@" || array_exists "--help" "$@"; then - echo "Usage: $(basename "$0") [--list-commands] [] ..." >&2 - exit - fi - - commands=" - graphemes - sentences - words - regional-indicator - simple-word - whitespace - " - if array_exists "--list-commands" "$@"; then - for cmd in $commands; do - echo "$cmd" - done - exit - fi - - # ucd-generate is used to compile regexes into DFAs. - requires ucd-generate - - mkdir -p src/unicode/fsm/ - - cmds=$* - if [ $# -eq 0 ] || array_exists "all" "$@"; then - cmds=$commands - fi - for cmd in $cmds; do - if array_exists "$cmd" $commands; then - fun="$(echo "$cmd" | sed 's/-/_/g')" - eval "$fun" - else - echo "unrecognized command: $cmd" >&2 - fi - done -} - -main "$@" diff --git a/vendor/bstr/scripts/regex/grapheme.sh b/vendor/bstr/scripts/regex/grapheme.sh deleted file mode 100644 index 0b2b54daa..000000000 --- a/vendor/bstr/scripts/regex/grapheme.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/sh - -# vim: indentexpr= nosmartindent autoindent -# vim: tabstop=2 shiftwidth=2 softtabstop=2 - -# This regex was manually written, derived from the rules in UAX #29. -# Particularly, from Table 1c, which lays out a regex for grapheme clusters. - -CR="\p{gcb=CR}" -LF="\p{gcb=LF}" -Control="\p{gcb=Control}" -Prepend="\p{gcb=Prepend}" -L="\p{gcb=L}" -V="\p{gcb=V}" -LV="\p{gcb=LV}" -LVT="\p{gcb=LVT}" -T="\p{gcb=T}" -RI="\p{gcb=RI}" -Extend="\p{gcb=Extend}" -ZWJ="\p{gcb=ZWJ}" -SpacingMark="\p{gcb=SpacingMark}" - -Any="\p{any}" -ExtendPict="\p{Extended_Pictographic}" - -echo "(?x) -$CR $LF -| -$Control -| -$Prepend* -( - ( - ($L* ($V+ | $LV $V* | $LVT) $T*) - | - $L+ - | - $T+ - ) - | - $RI $RI - | - $ExtendPict ($Extend* $ZWJ $ExtendPict)* - | - [^$Control $CR $LF] -) -[$Extend $ZWJ $SpacingMark]* -| -$Any -" diff --git a/vendor/bstr/scripts/regex/sentence.sh b/vendor/bstr/scripts/regex/sentence.sh deleted file mode 100644 index 689d1849f..000000000 --- a/vendor/bstr/scripts/regex/sentence.sh +++ /dev/null @@ -1,176 +0,0 @@ -#!/bin/sh - -# vim: indentexpr= nosmartindent autoindent -# vim: tabstop=2 shiftwidth=2 softtabstop=2 - -# This is a regex that I reverse engineered from the sentence boundary chain -# rules in UAX #29. Unlike the grapheme regex, which is essentially provided -# for us in UAX #29, no such sentence regex exists. -# -# I looked into how ICU achieves this, since UAX #29 hints that producing -# finite state machines for grapheme/sentence/word/line breaking is possible, -# but only easy to do for graphemes. ICU does this by implementing their own -# DSL for describing the break algorithms in terms of the chaining rules -# directly. You can see an example for sentences in -# icu4c/source/data/brkitr/rules/sent.txt. ICU then builds a finite state -# machine from those rules in a mostly standard way, but implements the -# "chaining" aspect of the rules by connecting overlapping end and start -# states. For example, given SB7: -# -# (Upper | Lower) ATerm x Upper -# -# Then the naive way to convert this into a regex would be something like -# -# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper} -# -# Unfortunately, this is incorrect. Why? Well, consider an example like so: -# -# U.S.A. -# -# A correct implementation of the sentence breaking algorithm should not insert -# any breaks here, exactly in accordance with repeatedly applying rule SB7 as -# given above. Our regex fails to do this because it will first match `U.S` -# without breaking them---which is correct---but will then start looking for -# its next rule beginning with a full stop (in ATerm) and followed by an -# uppercase letter (A). This will wind up triggering rule SB11 (without -# matching `A`), which inserts a break. -# -# The reason why this happens is because our initial application of rule SB7 -# "consumes" the next uppercase letter (S), which we want to reuse as a prefix -# in the next rule application. A natural way to express this would be with -# look-around, although it's not clear that works in every case since you -# ultimately might want to consume that ending uppercase letter. In any case, -# we can't use look-around in our truly regular regexes, so we must fix this. -# The approach we take is to explicitly repeat rules when a suffix of a rule -# is a prefix of another rule. In the case of SB7, the end of the rule, an -# uppercase letter, also happens to match the beginning of the rule. This can -# in turn be repeated indefinitely. Thus, our actual translation to a regex is: -# -# [\p{sb=Upper}\p{sb=Lower}]\p{sb=ATerm}\p{sb=Upper}(\p{sb=ATerm}\p{sb=Upper}* -# -# It turns out that this is exactly what ICU does, but in their case, they do -# it automatically. In our case, we connect the chaining rules manually. It's -# tedious. With that said, we do no implement Unicode line breaking with this -# approach, which is a far scarier beast. In that case, it would probably be -# worth writing the code to do what ICU does. -# -# In the case of sentence breaks, there aren't *too* many overlaps of this -# nature. We list them out exhaustively to make this clear, because it's -# essentially impossible to easily observe this in the regex. (It took me a -# full day to figure all of this out.) Rules marked with N/A mean that they -# specify a break, and this strategy only really applies to stringing together -# non-breaks. -# -# SB1 - N/A -# SB2 - N/A -# SB3 - None -# SB4 - N/A -# SB5 - None -# SB6 - None -# SB7 - End overlaps with beginning of SB7 -# SB8 - End overlaps with beginning of SB7 -# SB8a - End overlaps with beginning of SB6, SB8, SB8a, SB9, SB10, SB11 -# SB9 - None -# SB10 - None -# SB11 - None -# SB998 - N/A -# -# SB8a is in particular quite tricky to get right without look-ahead, since it -# allows ping-ponging between match rules SB8a and SB9-11, where SB9-11 -# otherwise indicate that a break has been found. In the regex below, we tackle -# this by only permitting part of SB8a to match inside our core non-breaking -# repetition. In particular, we only allow the parts of SB8a to match that -# permit the non-breaking components to continue. If a part of SB8a matches -# that guarantees a pop out to SB9-11, (like `STerm STerm`), then we let it -# happen. This still isn't correct because an SContinue might be seen which -# would allow moving back into SB998 and thus the non-breaking repetition, so -# we handle that case as well. -# -# Finally, the last complication here is the sprinkling of $Ex* everywhere. -# This essentially corresponds to the implementation of SB5 by following -# UAX #29's recommendation in S6.2. Essentially, we use it avoid ever breaking -# in the middle of a grapheme cluster. - -CR="\p{sb=CR}" -LF="\p{sb=LF}" -Sep="\p{sb=Sep}" -Close="\p{sb=Close}" -Sp="\p{sb=Sp}" -STerm="\p{sb=STerm}" -ATerm="\p{sb=ATerm}" -SContinue="\p{sb=SContinue}" -Numeric="\p{sb=Numeric}" -Upper="\p{sb=Upper}" -Lower="\p{sb=Lower}" -OLetter="\p{sb=OLetter}" - -Ex="[\p{sb=Extend}\p{sb=Format}]" -ParaSep="[$Sep $CR $LF]" -SATerm="[$STerm $ATerm]" - -LetterSepTerm="[$OLetter $Upper $Lower $ParaSep $SATerm]" - -echo "(?x) -( - # SB6 - $ATerm $Ex* - $Numeric - | - # SB7 - [$Upper $Lower] $Ex* $ATerm $Ex* - $Upper $Ex* - # overlap with SB7 - ($ATerm $Ex* $Upper $Ex*)* - | - # SB8 - $ATerm $Ex* $Close* $Ex* $Sp* $Ex* - ([^$LetterSepTerm] $Ex*)* $Lower $Ex* - # overlap with SB7 - ($ATerm $Ex* $Upper $Ex*)* - | - # SB8a - $SATerm $Ex* $Close* $Ex* $Sp* $Ex* - ( - $SContinue - | - $ATerm $Ex* - # Permit repetition of SB8a - (($Close $Ex*)* ($Sp $Ex*)* $SATerm)* - # In order to continue non-breaking matching, we now must observe - # a match with a rule that keeps us in SB6-8a. Otherwise, we've entered - # one of SB9-11 and know that a break must follow. - ( - # overlap with SB6 - $Numeric - | - # overlap with SB8 - ($Close $Ex*)* ($Sp $Ex*)* - ([^$LetterSepTerm] $Ex*)* $Lower $Ex* - # overlap with SB7 - ($ATerm $Ex* $Upper $Ex*)* - | - # overlap with SB8a - ($Close $Ex*)* ($Sp $Ex*)* $SContinue - ) - | - $STerm $Ex* - # Permit repetition of SB8a - (($Close $Ex*)* ($Sp $Ex*)* $SATerm)* - # As with ATerm above, in order to continue non-breaking matching, we - # must now observe a match with a rule that keeps us out of SB9-11. - # For STerm, the only such possibility is to see an SContinue. Anything - # else will result in a break. - ($Close $Ex*)* ($Sp $Ex*)* $SContinue - ) - | - # SB998 - # The logic behind this catch-all is that if we get to this point and - # see a Sep, CR, LF, STerm or ATerm, then it has to fall into one of - # SB9, SB10 or SB11. In the cases of SB9-11, we always find a break since - # SB11 acts as a catch-all to induce a break following a SATerm that isn't - # handled by rules SB6-SB8a. - [^$ParaSep $SATerm] -)* -# The following collapses rules SB3, SB4, part of SB8a, SB9, SB10 and SB11. -($SATerm $Ex* ($Close $Ex*)* ($Sp $Ex*)*)* ($CR $LF | $ParaSep)? -" diff --git a/vendor/bstr/scripts/regex/word.sh b/vendor/bstr/scripts/regex/word.sh deleted file mode 100644 index 78c7a05cf..000000000 --- a/vendor/bstr/scripts/regex/word.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/sh - -# vim: indentexpr= nosmartindent autoindent -# vim: tabstop=2 shiftwidth=2 softtabstop=2 - -# See the comments in regex/sentence.sh for the general approach to how this -# regex was written. -# -# Writing the regex for this was *hard*. It took me two days of hacking to get -# this far, and that was after I had finished the sentence regex, so my brain -# was fully cached on this. Unlike the sentence regex, the rules in the regex -# below don't correspond as nicely to the rules in UAX #29. In particular, the -# UAX #29 rules have a ton of overlap with each other, which requires crazy -# stuff in the regex. I'm not even sure the regex below is 100% correct or even -# minimal, however, I did compare this with the ICU word segmenter on a few -# different corpora, and it produces identical results. (In addition to of -# course passing the UCD tests.) -# -# In general, I consider this approach to be a failure. Firstly, this is -# clearly a write-only regex. Secondly, building the minimized DFA for this is -# incredibly slow. Thirdly, the DFA is itself very large (~240KB). Fourthly, -# reversing this regex (for reverse word iteration) results in a >19MB DFA. -# Yes. That's MB. Wat. And it took 5 minutes to build. -# -# I think we might consider changing our approach to this problem. The normal -# path I've seen, I think, is to decode codepoints one at a time, and then -# thread them through a state machine in the code itself. We could take this -# approach, or possibly combine it with a DFA that tells us which Word_Break -# value a codepoint has. I'd prefer the latter approach, but it requires adding -# RegexSet support to regex-automata. Something that should definitely be done, -# but is a fair amount of work. -# -# Gah. - -CR="\p{wb=CR}" -LF="\p{wb=LF}" -Newline="\p{wb=Newline}" -ZWJ="\p{wb=ZWJ}" -RI="\p{wb=Regional_Indicator}" -Katakana="\p{wb=Katakana}" -HebrewLet="\p{wb=HebrewLetter}" -ALetter="\p{wb=ALetter}" -SingleQuote="\p{wb=SingleQuote}" -DoubleQuote="\p{wb=DoubleQuote}" -MidNumLet="\p{wb=MidNumLet}" -MidLetter="\p{wb=MidLetter}" -MidNum="\p{wb=MidNum}" -Numeric="\p{wb=Numeric}" -ExtendNumLet="\p{wb=ExtendNumLet}" -WSegSpace="\p{wb=WSegSpace}" - -Any="\p{any}" -Ex="[\p{wb=Extend} \p{wb=Format} $ZWJ]" -ExtendPict="\p{Extended_Pictographic}" -AHLetter="[$ALetter $HebrewLet]" -MidNumLetQ="[$MidNumLet $SingleQuote]" - -AHLetterRepeat="$AHLetter $Ex* ([$MidLetter $MidNumLetQ] $Ex* $AHLetter $Ex*)*" -NumericRepeat="$Numeric $Ex* ([$MidNum $MidNumLetQ] $Ex* $Numeric $Ex*)*" - -echo "(?x) -$CR $LF -| -[$Newline $CR $LF] -| -$WSegSpace $WSegSpace+ -| -( - ([^$Newline $CR $LF]? $Ex* $ZWJ $ExtendPict $Ex*)+ - | - ($ExtendNumLet $Ex*)* $AHLetter $Ex* - ( - ( - ($NumericRepeat | $ExtendNumLet $Ex*)* - | - [$MidLetter $MidNumLetQ] $Ex* - ) - $AHLetter $Ex* - )+ - ($NumericRepeat | $ExtendNumLet $Ex*)* - | - ($ExtendNumLet $Ex*)* $AHLetter $Ex* ($NumericRepeat | $ExtendNumLet $Ex*)+ - | - ($ExtendNumLet $Ex*)* $Numeric $Ex* - ( - ( - ($AHLetterRepeat | $ExtendNumLet $Ex*)* - | - [$MidNum $MidNumLetQ] $Ex* - ) - $Numeric $Ex* - )+ - ($AHLetterRepeat | $ExtendNumLet $Ex*)* - | - ($ExtendNumLet $Ex*)* $Numeric $Ex* ($AHLetterRepeat | $ExtendNumLet $Ex*)+ - | - $Katakana $Ex* - (($Katakana | $ExtendNumLet) $Ex*)+ - | - $ExtendNumLet $Ex* - (($ExtendNumLet | $AHLetter | $Numeric | $Katakana) $Ex*)+ -)+ -| -$HebrewLet $Ex* $SingleQuote $Ex* -| -($HebrewLet $Ex* $DoubleQuote $Ex*)+ $HebrewLet $Ex* -| -$RI $Ex* $RI $Ex* -| -$Any $Ex* -" diff --git a/vendor/bstr/src/ext_slice.rs b/vendor/bstr/src/ext_slice.rs index 5e1801971..503e0b258 100644 --- a/vendor/bstr/src/ext_slice.rs +++ b/vendor/bstr/src/ext_slice.rs @@ -3106,8 +3106,8 @@ impl<'a> Finder<'a> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> Finder<'static> { Finder(self.0.into_owned()) @@ -3189,8 +3189,8 @@ impl<'a> FinderReverse<'a> { /// If this is already an owned finder, then this is a no-op. Otherwise, /// this copies the needle. /// - /// This is only available when the `std` feature is enabled. - #[cfg(feature = "std")] + /// This is only available when the `alloc` feature is enabled. + #[cfg(feature = "alloc")] #[inline] pub fn into_owned(self) -> FinderReverse<'static> { FinderReverse(self.0.into_owned()) diff --git a/vendor/bstr/src/lib.rs b/vendor/bstr/src/lib.rs index 8598e4458..47c624408 100644 --- a/vendor/bstr/src/lib.rs +++ b/vendor/bstr/src/lib.rs @@ -394,20 +394,6 @@ and Unicode support. #![cfg_attr(not(any(feature = "std", test)), no_std)] #![cfg_attr(docsrs, feature(doc_auto_cfg))] -// Why do we do this? Well, in order for us to use once_cell's 'Lazy' type to -// load DFAs, it requires enabling its 'std' feature. Yet, there is really -// nothing about our 'unicode' feature that requires 'std'. We could declare -// that 'unicode = [std, ...]', which would be fine, but once regex-automata -// 0.3 is a thing, I believe we can drop once_cell altogether and thus drop -// the need for 'std' to be enabled when 'unicode' is enabled. But if we make -// 'unicode' also enable 'std', then it would be a breaking change to remove -// 'std' from that list. -// -// So, for right now, we force folks to explicitly say they want 'std' if they -// want 'unicode'. In the future, we should be able to relax this. -#[cfg(all(feature = "unicode", not(feature = "std")))] -compile_error!("enabling 'unicode' requires enabling 'std'"); - #[cfg(feature = "alloc")] extern crate alloc; diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa index 31f99c1f5..c4321e2c9 100644 Binary files a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa index 3a51728bb..3b9905da9 100644 Binary files a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs index dea4a7e3e..ccba7d952 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe GRAPHEME_BREAK_FWD src/unicode/fsm/ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static GRAPHEME_BREAK_FWD: Lazy> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa index 742d2a6a2..5d7d34a01 100644 Binary files a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa index d1937f26c..7472844d4 100644 Binary files a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs index 2d2cd542f..e5619b2f0 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --reverse --match-kind all --no-captures --shrink --rustfmt --safe GRAPHEME_BREAK_REV src/unicode/fsm/ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static GRAPHEME_BREAK_REV: Lazy> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_rev.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa index 1a3357f71..1f830009a 100644 Binary files a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa index e437aae3a..b0db99dd1 100644 Binary files a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs index db7a40fcd..af1c73051 100644 --- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs +++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator} +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe REGIONAL_INDICATOR_REV src/unicode/fsm/ \p{gcb=Regional_Indicator} // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static REGIONAL_INDICATOR_REV: Lazy> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa index 1abdae880..d4bd841e5 100644 Binary files a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa index 2f8aadd30..df1e9521d 100644 Binary files a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs index 97dd658e4..0f22eeefb 100644 --- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SENTENCE_BREAK_FWD src/unicode/fsm/ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static SENTENCE_BREAK_FWD: Lazy> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("sentence_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("sentence_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa index 888e46599..08981bdd0 100644 Binary files a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa index a1d527c74..daf01d60c 100644 Binary files a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs index 32b69b611..97bd006fb 100644 --- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SIMPLE_WORD_FWD src/unicode/fsm/ \w // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static SIMPLE_WORD_FWD: Lazy> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("simple_word_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("simple_word_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa index bcfc4e9a1..fe32b0d98 100644 Binary files a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa index d534a464a..0179b66f7 100644 Binary files a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs index 0780412ae..193deb0ca 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+ +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --shrink --rustfmt --safe WHITESPACE_ANCHORED_FWD src/unicode/fsm/ \s+ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static WHITESPACE_ANCHORED_FWD: Lazy> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa index 427d3a922..cf8ef736b 100644 Binary files a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa index 7cc3a0a99..740fcd040 100644 Binary files a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs index 3d0d7a661..2eb98c0bd 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+ +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe WHITESPACE_ANCHORED_REV src/unicode/fsm/ \s+ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u16], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u16; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u16], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u16; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static WHITESPACE_ANCHORED_REV: Lazy> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa index efb9c8198..6cca67ff6 100644 Binary files a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa and b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa index 9a716d060..d8bfd0126 100644 Binary files a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa and b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa differ diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs index dcb5f6bce..825782f1c 100644 --- a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe WORD_BREAK_FWD src/unicode/fsm/ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("word_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("word_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static WORD_BREAK_FWD: Lazy> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("word_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("word_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs index 13b730c48..8a701be98 100644 --- a/vendor/bstr/src/unicode/grapheme.rs +++ b/vendor/bstr/src/unicode/grapheme.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { // Safe because all ASCII bytes are valid UTF-8. let grapheme = unsafe { bs[..1].to_str_unchecked() }; (grapheme, 1) - } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let grapheme = unsafe { bs[..end].to_str_unchecked() }; + let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() }; (grapheme, grapheme.len()) } else { const INVALID: &'static str = "\u{FFFD}"; @@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) { - start = adjust_rev_for_regional_indicator(bs, start); + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap() + } { + let start = adjust_rev_for_regional_indicator(bs, hm.offset()); // Safe because a match can only occur for valid UTF-8. let grapheme = unsafe { bs[start..].to_str_unchecked() }; (grapheme, grapheme.len()) @@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize { // regional indicator codepoints. A fix probably requires refactoring this // code a bit such that we don't rescan regional indicators. let mut count = 0; - while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) { - bs = &bs[..start]; + while let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap() + } { + bs = &bs[..hm.offset()]; count += 1; } if count % 2 == 0 { diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs index ff29c7e25..0baf4dfeb 100644 --- a/vendor/bstr/src/unicode/sentence.rs +++ b/vendor/bstr/src/unicode/sentence.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -145,9 +145,12 @@ impl<'a> Iterator for SentenceIndices<'a> { fn decode_sentence(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + SENTENCE_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let sentence = unsafe { bs[..end].to_str_unchecked() }; + let sentence = unsafe { bs[..hm.offset()].to_str_unchecked() }; (sentence, sentence.len()) } else { const INVALID: &'static str = "\u{FFFD}"; diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs index b5eff300e..bf1f47c8e 100644 --- a/vendor/bstr/src/unicode/whitespace.rs +++ b/vendor/bstr/src/unicode/whitespace.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::unicode::fsm::{ whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD, @@ -7,10 +7,18 @@ use crate::unicode::fsm::{ /// Return the first position of a non-whitespace character. pub fn whitespace_len_fwd(slice: &[u8]) -> usize { - WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0) + let input = Input::new(slice).anchored(Anchored::Yes); + WHITESPACE_ANCHORED_FWD + .try_search_fwd(&input) + .unwrap() + .map_or(0, |hm| hm.offset()) } /// Return the last position of a non-whitespace character. pub fn whitespace_len_rev(slice: &[u8]) -> usize { - WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len()) + let input = Input::new(slice).anchored(Anchored::Yes); + WHITESPACE_ANCHORED_REV + .try_search_rev(&input) + .unwrap() + .map_or(slice.len(), |hm| hm.offset()) } diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs index 849f0c8e2..d6bf0f6f3 100644 --- a/vendor/bstr/src/unicode/word.rs +++ b/vendor/bstr/src/unicode/word.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -67,7 +67,9 @@ impl<'a> Iterator for Words<'a> { #[inline] fn next(&mut self) -> Option<&'a str> { while let Some(word) = self.0.next() { - if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + let input = + Input::new(word).anchored(Anchored::Yes).earliest(true); + if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() { return Some(word); } } @@ -143,7 +145,9 @@ impl<'a> Iterator for WordIndices<'a> { #[inline] fn next(&mut self) -> Option<(usize, usize, &'a str)> { while let Some((start, end, word)) = self.0.next() { - if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + let input = + Input::new(word).anchored(Anchored::Yes).earliest(true); + if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() { return Some((start, end, word)); } } @@ -307,9 +311,12 @@ impl<'a> Iterator for WordsWithBreakIndices<'a> { fn decode_word(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(end) = WORD_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + WORD_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let word = unsafe { bs[..end].to_str_unchecked() }; + let word = unsafe { bs[..hm.offset()].to_str_unchecked() }; (word, word.len()) } else { const INVALID: &'static str = "\u{FFFD}"; -- cgit v1.2.3