diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
commit | c23a457e72abe608715ac76f076f47dc42af07a5 (patch) | |
tree | 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/bstr/src/unicode | |
parent | Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip |
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/bstr/src/unicode')
28 files changed, 167 insertions, 301 deletions
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa Binary files differindex 31f99c1f5..c4321e2c9 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa Binary files differindex 3a51728bb..3b9905da9 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs index dea4a7e3e..ccba7d952 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe GRAPHEME_BREAK_FWD src/unicode/fsm/ <snip: arg too long> // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static GRAPHEME_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa Binary files differindex 742d2a6a2..5d7d34a01 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa Binary files differindex d1937f26c..7472844d4 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs index 2d2cd542f..e5619b2f0 100644 --- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs +++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --reverse --match-kind all --no-captures --shrink --rustfmt --safe GRAPHEME_BREAK_REV src/unicode/fsm/ <snip: arg too long> // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static GRAPHEME_BREAK_REV: Lazy<DFA<&'static [u8]>> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_rev.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("grapheme_break_rev.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa Binary files differindex 1a3357f71..1f830009a 100644 --- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa Binary files differindex e437aae3a..b0db99dd1 100644 --- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs index db7a40fcd..af1c73051 100644 --- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs +++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator} +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe REGIONAL_INDICATOR_REV src/unicode/fsm/ \p{gcb=Regional_Indicator} // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static REGIONAL_INDICATOR_REV: Lazy<DFA<&'static [u32]>> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa Binary files differindex 1abdae880..d4bd841e5 100644 --- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa Binary files differindex 2f8aadd30..df1e9521d 100644 --- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs index 97dd658e4..0f22eeefb 100644 --- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SENTENCE_BREAK_FWD src/unicode/fsm/ <snip: arg too long> // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static SENTENCE_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("sentence_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("sentence_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa Binary files differindex 888e46599..08981bdd0 100644 --- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa Binary files differindex a1d527c74..daf01d60c 100644 --- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs index 32b69b611..97bd006fb 100644 --- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SIMPLE_WORD_FWD src/unicode/fsm/ \w // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static SIMPLE_WORD_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("simple_word_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("simple_word_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa Binary files differindex bcfc4e9a1..fe32b0d98 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa Binary files differindex d534a464a..0179b66f7 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs index 0780412ae..193deb0ca 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+ +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --shrink --rustfmt --safe WHITESPACE_ANCHORED_FWD src/unicode/fsm/ \s+ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u8], u8>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static WHITESPACE_ANCHORED_FWD: Lazy<DFA<&'static [u32]>> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa Binary files differindex 427d3a922..cf8ef736b 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa Binary files differindex 7cc3a0a99..740fcd040 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs index 3d0d7a661..2eb98c0bd 100644 --- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs +++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs @@ -1,41 +1,24 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+ +// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe WHITESPACE_ANCHORED_REV src/unicode/fsm/ \s+ // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u16], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u16; 0], - bytes: B, - } +use regex_automata::{ + dfa::dense::DFA, + util::{lazy::Lazy, wire::AlignAs}, +}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy< - ::regex_automata::DenseDFA<&'static [u16], u16>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u16; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), - }; - - unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) } -}); +pub static WHITESPACE_ANCHORED_REV: Lazy<DFA<&'static [u32]>> = + Lazy::new(|| { + static ALIGNED: &AlignAs<[u8], u32> = &AlignAs { + _align: [], + #[cfg(target_endian = "big")] + bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"), + #[cfg(target_endian = "little")] + bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"), + }; + let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes) + .expect("serialized DFA should be valid"); + dfa + }); diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa Binary files differindex efb9c8198..6cca67ff6 100644 --- a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa +++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa Binary files differindex 9a716d060..d8bfd0126 100644 --- a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa +++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs index dcb5f6bce..825782f1c 100644 --- a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs +++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs @@ -1,41 +1,19 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)] +// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe WORD_BREAK_FWD src/unicode/fsm/ <snip: arg too long> // -// ucd-generate 0.2.12 is available on crates.io. +// regex-cli 0.0.1 is available on crates.io. -#[cfg(target_endian = "big")] -pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } +use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy}; - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("word_break_fwd.bigendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } -}); - -#[cfg(target_endian = "little")] -pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy< - ::regex_automata::SparseDFA<&'static [u8], u32>, -> = ::once_cell::sync::Lazy::new(|| { - #[repr(C)] - struct Aligned<B: ?Sized> { - _align: [u8; 0], - bytes: B, - } - - static ALIGNED: &'static Aligned<[u8]> = &Aligned { - _align: [], - bytes: *include_bytes!("word_break_fwd.littleendian.dfa"), - }; - - unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) } +pub static WORD_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| { + #[cfg(target_endian = "big")] + static BYTES: &'static [u8] = + include_bytes!("word_break_fwd.bigendian.dfa"); + #[cfg(target_endian = "little")] + static BYTES: &'static [u8] = + include_bytes!("word_break_fwd.littleendian.dfa"); + let (dfa, _) = + DFA::from_bytes(BYTES).expect("serialized DFA should be valid"); + dfa }); diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs index 13b730c48..8a701be98 100644 --- a/vendor/bstr/src/unicode/grapheme.rs +++ b/vendor/bstr/src/unicode/grapheme.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { // Safe because all ASCII bytes are valid UTF-8. let grapheme = unsafe { bs[..1].to_str_unchecked() }; (grapheme, 1) - } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let grapheme = unsafe { bs[..end].to_str_unchecked() }; + let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() }; (grapheme, grapheme.len()) } else { const INVALID: &'static str = "\u{FFFD}"; @@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) { - start = adjust_rev_for_regional_indicator(bs, start); + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap() + } { + let start = adjust_rev_for_regional_indicator(bs, hm.offset()); // Safe because a match can only occur for valid UTF-8. let grapheme = unsafe { bs[start..].to_str_unchecked() }; (grapheme, grapheme.len()) @@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize { // regional indicator codepoints. A fix probably requires refactoring this // code a bit such that we don't rescan regional indicators. let mut count = 0; - while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) { - bs = &bs[..start]; + while let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap() + } { + bs = &bs[..hm.offset()]; count += 1; } if count % 2 == 0 { diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs index ff29c7e25..0baf4dfeb 100644 --- a/vendor/bstr/src/unicode/sentence.rs +++ b/vendor/bstr/src/unicode/sentence.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -145,9 +145,12 @@ impl<'a> Iterator for SentenceIndices<'a> { fn decode_sentence(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + SENTENCE_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let sentence = unsafe { bs[..end].to_str_unchecked() }; + let sentence = unsafe { bs[..hm.offset()].to_str_unchecked() }; (sentence, sentence.len()) } else { const INVALID: &'static str = "\u{FFFD}"; diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs index b5eff300e..bf1f47c8e 100644 --- a/vendor/bstr/src/unicode/whitespace.rs +++ b/vendor/bstr/src/unicode/whitespace.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::unicode::fsm::{ whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD, @@ -7,10 +7,18 @@ use crate::unicode::fsm::{ /// Return the first position of a non-whitespace character. pub fn whitespace_len_fwd(slice: &[u8]) -> usize { - WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0) + let input = Input::new(slice).anchored(Anchored::Yes); + WHITESPACE_ANCHORED_FWD + .try_search_fwd(&input) + .unwrap() + .map_or(0, |hm| hm.offset()) } /// Return the last position of a non-whitespace character. pub fn whitespace_len_rev(slice: &[u8]) -> usize { - WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len()) + let input = Input::new(slice).anchored(Anchored::Yes); + WHITESPACE_ANCHORED_REV + .try_search_rev(&input) + .unwrap() + .map_or(slice.len(), |hm| hm.offset()) } diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs index 849f0c8e2..d6bf0f6f3 100644 --- a/vendor/bstr/src/unicode/word.rs +++ b/vendor/bstr/src/unicode/word.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -67,7 +67,9 @@ impl<'a> Iterator for Words<'a> { #[inline] fn next(&mut self) -> Option<&'a str> { while let Some(word) = self.0.next() { - if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + let input = + Input::new(word).anchored(Anchored::Yes).earliest(true); + if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() { return Some(word); } } @@ -143,7 +145,9 @@ impl<'a> Iterator for WordIndices<'a> { #[inline] fn next(&mut self) -> Option<(usize, usize, &'a str)> { while let Some((start, end, word)) = self.0.next() { - if SIMPLE_WORD_FWD.is_match(word.as_bytes()) { + let input = + Input::new(word).anchored(Anchored::Yes).earliest(true); + if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() { return Some((start, end, word)); } } @@ -307,9 +311,12 @@ impl<'a> Iterator for WordsWithBreakIndices<'a> { fn decode_word(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(end) = WORD_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + WORD_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let word = unsafe { bs[..end].to_str_unchecked() }; + let word = unsafe { bs[..hm.offset()].to_str_unchecked() }; (word, word.len()) } else { const INVALID: &'static str = "\u{FFFD}"; |