diff options
Diffstat (limited to 'vendor/bstr/src/unicode/grapheme.rs')
-rw-r--r-- | vendor/bstr/src/unicode/grapheme.rs | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs index 13b730c48..8a701be98 100644 --- a/vendor/bstr/src/unicode/grapheme.rs +++ b/vendor/bstr/src/unicode/grapheme.rs @@ -1,4 +1,4 @@ -use regex_automata::DFA; +use regex_automata::{dfa::Automaton, Anchored, Input}; use crate::{ ext_slice::ByteSlice, @@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { // Safe because all ASCII bytes are valid UTF-8. let grapheme = unsafe { bs[..1].to_str_unchecked() }; (grapheme, 1) - } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) { + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap() + } { // Safe because a match can only occur for valid UTF-8. - let grapheme = unsafe { bs[..end].to_str_unchecked() }; + let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() }; (grapheme, grapheme.len()) } else { const INVALID: &'static str = "\u{FFFD}"; @@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) { fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) { if bs.is_empty() { ("", 0) - } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) { - start = adjust_rev_for_regional_indicator(bs, start); + } else if let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap() + } { + let start = adjust_rev_for_regional_indicator(bs, hm.offset()); // Safe because a match can only occur for valid UTF-8. let grapheme = unsafe { bs[start..].to_str_unchecked() }; (grapheme, grapheme.len()) @@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize { // regional indicator codepoints. A fix probably requires refactoring this // code a bit such that we don't rescan regional indicators. let mut count = 0; - while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) { - bs = &bs[..start]; + while let Some(hm) = { + let input = Input::new(bs).anchored(Anchored::Yes); + REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap() + } { + bs = &bs[..hm.offset()]; count += 1; } if count % 2 == 0 { |