summaryrefslogtreecommitdiffstats
path: root/vendor/bstr/src/unicode
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/bstr/src/unicode
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/bstr/src/unicode')
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfabin10781 -> 22420 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfabin10781 -> 22420 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfabin55271 -> 90997 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfabin55271 -> 90997 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfabin366 -> 1240 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfabin366 -> 1240 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfabin153619 -> 200879 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfabin153619 -> 200879 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfabin9237 -> 11095 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfabin9237 -> 11095 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfabin572 -> 2964 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfabin572 -> 2964 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfabin884 -> 3232 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfabin884 -> 3232 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs55
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfabin236309 -> 299263 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfabin236309 -> 299263 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.rs48
-rw-r--r--vendor/bstr/src/unicode/grapheme.rs23
-rw-r--r--vendor/bstr/src/unicode/sentence.rs9
-rw-r--r--vendor/bstr/src/unicode/whitespace.rs14
-rw-r--r--vendor/bstr/src/unicode/word.rs17
28 files changed, 167 insertions, 301 deletions
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
index 31f99c1f5..c4321e2c9 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
index 3a51728bb..3b9905da9 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
index dea4a7e3e..ccba7d952 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe GRAPHEME_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static GRAPHEME_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
index 742d2a6a2..5d7d34a01 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
index d1937f26c..7472844d4 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
index 2d2cd542f..e5619b2f0 100644
--- a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --reverse --match-kind all --no-captures --shrink --rustfmt --safe GRAPHEME_BREAK_REV src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static GRAPHEME_BREAK_REV: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_rev.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("grapheme_break_rev.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
index 1a3357f71..1f830009a 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
index e437aae3a..b0db99dd1 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
index db7a40fcd..af1c73051 100644
--- a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe REGIONAL_INDICATOR_REV src/unicode/fsm/ \p{gcb=Regional_Indicator}
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static REGIONAL_INDICATOR_REV: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
index 1abdae880..d4bd841e5 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
index 2f8aadd30..df1e9521d 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
index 97dd658e4..0f22eeefb 100644
--- a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SENTENCE_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static SENTENCE_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("sentence_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("sentence_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
index 888e46599..08981bdd0 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
index a1d527c74..daf01d60c 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
index 32b69b611..97bd006fb 100644
--- a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe SIMPLE_WORD_FWD src/unicode/fsm/ \w
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static SIMPLE_WORD_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("simple_word_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("simple_word_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
index bcfc4e9a1..fe32b0d98 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
index d534a464a..0179b66f7 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
index 0780412ae..193deb0ca 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --shrink --rustfmt --safe WHITESPACE_ANCHORED_FWD src/unicode/fsm/ \s+
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u8], u8>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static WHITESPACE_ANCHORED_FWD: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
index 427d3a922..cf8ef736b 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
index 7cc3a0a99..740fcd040 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
index 3d0d7a661..2eb98c0bd 100644
--- a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -1,41 +1,24 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+
+// regex-cli generate serialize dense dfa --minimize --start-kind anchored --reverse --no-captures --shrink --rustfmt --safe WHITESPACE_ANCHORED_REV src/unicode/fsm/ \s+
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u16], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u16; 0],
- bytes: B,
- }
+use regex_automata::{
+ dfa::dense::DFA,
+ util::{lazy::Lazy, wire::AlignAs},
+};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
- ::regex_automata::DenseDFA<&'static [u16], u16>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u16; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
-});
+pub static WHITESPACE_ANCHORED_REV: Lazy<DFA<&'static [u32]>> =
+ Lazy::new(|| {
+ static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+ _align: [],
+ #[cfg(target_endian = "big")]
+ bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
+ #[cfg(target_endian = "little")]
+ bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
+ };
+ let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes)
+ .expect("serialized DFA should be valid");
+ dfa
+ });
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
index efb9c8198..6cca67ff6 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
index 9a716d060..d8bfd0126 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
index dcb5f6bce..825782f1c 100644
--- a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
@@ -1,41 +1,19 @@
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
//
-// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+// regex-cli generate serialize sparse dfa --minimize --start-kind anchored --shrink --rustfmt --safe WORD_BREAK_FWD src/unicode/fsm/ <snip: arg too long>
//
-// ucd-generate 0.2.12 is available on crates.io.
+// regex-cli 0.0.1 is available on crates.io.
-#[cfg(target_endian = "big")]
-pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
+use regex_automata::{dfa::sparse::DFA, util::lazy::Lazy};
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
-});
-
-#[cfg(target_endian = "little")]
-pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
- ::regex_automata::SparseDFA<&'static [u8], u32>,
-> = ::once_cell::sync::Lazy::new(|| {
- #[repr(C)]
- struct Aligned<B: ?Sized> {
- _align: [u8; 0],
- bytes: B,
- }
-
- static ALIGNED: &'static Aligned<[u8]> = &Aligned {
- _align: [],
- bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
- };
-
- unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+pub static WORD_BREAK_FWD: Lazy<DFA<&'static [u8]>> = Lazy::new(|| {
+ #[cfg(target_endian = "big")]
+ static BYTES: &'static [u8] =
+ include_bytes!("word_break_fwd.bigendian.dfa");
+ #[cfg(target_endian = "little")]
+ static BYTES: &'static [u8] =
+ include_bytes!("word_break_fwd.littleendian.dfa");
+ let (dfa, _) =
+ DFA::from_bytes(BYTES).expect("serialized DFA should be valid");
+ dfa
});
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs
index 13b730c48..8a701be98 100644
--- a/vendor/bstr/src/unicode/grapheme.rs
+++ b/vendor/bstr/src/unicode/grapheme.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
// Safe because all ASCII bytes are valid UTF-8.
let grapheme = unsafe { bs[..1].to_str_unchecked() };
(grapheme, 1)
- } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() };
(grapheme, grapheme.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
@@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
- start = adjust_rev_for_regional_indicator(bs, start);
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap()
+ } {
+ let start = adjust_rev_for_regional_indicator(bs, hm.offset());
// Safe because a match can only occur for valid UTF-8.
let grapheme = unsafe { bs[start..].to_str_unchecked() };
(grapheme, grapheme.len())
@@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
// regional indicator codepoints. A fix probably requires refactoring this
// code a bit such that we don't rescan regional indicators.
let mut count = 0;
- while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
- bs = &bs[..start];
+ while let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap()
+ } {
+ bs = &bs[..hm.offset()];
count += 1;
}
if count % 2 == 0 {
diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs
index ff29c7e25..0baf4dfeb 100644
--- a/vendor/bstr/src/unicode/sentence.rs
+++ b/vendor/bstr/src/unicode/sentence.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -145,9 +145,12 @@ impl<'a> Iterator for SentenceIndices<'a> {
fn decode_sentence(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ SENTENCE_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let sentence = unsafe { bs[..end].to_str_unchecked() };
+ let sentence = unsafe { bs[..hm.offset()].to_str_unchecked() };
(sentence, sentence.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs
index b5eff300e..bf1f47c8e 100644
--- a/vendor/bstr/src/unicode/whitespace.rs
+++ b/vendor/bstr/src/unicode/whitespace.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::unicode::fsm::{
whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD,
@@ -7,10 +7,18 @@ use crate::unicode::fsm::{
/// Return the first position of a non-whitespace character.
pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
- WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
+ let input = Input::new(slice).anchored(Anchored::Yes);
+ WHITESPACE_ANCHORED_FWD
+ .try_search_fwd(&input)
+ .unwrap()
+ .map_or(0, |hm| hm.offset())
}
/// Return the last position of a non-whitespace character.
pub fn whitespace_len_rev(slice: &[u8]) -> usize {
- WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
+ let input = Input::new(slice).anchored(Anchored::Yes);
+ WHITESPACE_ANCHORED_REV
+ .try_search_rev(&input)
+ .unwrap()
+ .map_or(slice.len(), |hm| hm.offset())
}
diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs
index 849f0c8e2..d6bf0f6f3 100644
--- a/vendor/bstr/src/unicode/word.rs
+++ b/vendor/bstr/src/unicode/word.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -67,7 +67,9 @@ impl<'a> Iterator for Words<'a> {
#[inline]
fn next(&mut self) -> Option<&'a str> {
while let Some(word) = self.0.next() {
- if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ let input =
+ Input::new(word).anchored(Anchored::Yes).earliest(true);
+ if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() {
return Some(word);
}
}
@@ -143,7 +145,9 @@ impl<'a> Iterator for WordIndices<'a> {
#[inline]
fn next(&mut self) -> Option<(usize, usize, &'a str)> {
while let Some((start, end, word)) = self.0.next() {
- if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ let input =
+ Input::new(word).anchored(Anchored::Yes).earliest(true);
+ if SIMPLE_WORD_FWD.try_search_fwd(&input).unwrap().is_some() {
return Some((start, end, word));
}
}
@@ -307,9 +311,12 @@ impl<'a> Iterator for WordsWithBreakIndices<'a> {
fn decode_word(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(end) = WORD_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ WORD_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let word = unsafe { bs[..end].to_str_unchecked() };
+ let word = unsafe { bs[..hm.offset()].to_str_unchecked() };
(word, word.len())
} else {
const INVALID: &'static str = "\u{FFFD}";