summaryrefslogtreecommitdiffstats
path: root/vendor/bstr/src/unicode/grapheme.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bstr/src/unicode/grapheme.rs')
-rw-r--r--vendor/bstr/src/unicode/grapheme.rs23
1 files changed, 16 insertions, 7 deletions
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs
index 13b730c48..8a701be98 100644
--- a/vendor/bstr/src/unicode/grapheme.rs
+++ b/vendor/bstr/src/unicode/grapheme.rs
@@ -1,4 +1,4 @@
-use regex_automata::DFA;
+use regex_automata::{dfa::Automaton, Anchored, Input};
use crate::{
ext_slice::ByteSlice,
@@ -211,9 +211,12 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
// Safe because all ASCII bytes are valid UTF-8.
let grapheme = unsafe { bs[..1].to_str_unchecked() };
(grapheme, 1)
- } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_FWD.try_search_fwd(&input).unwrap()
+ } {
// Safe because a match can only occur for valid UTF-8.
- let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ let grapheme = unsafe { bs[..hm.offset()].to_str_unchecked() };
(grapheme, grapheme.len())
} else {
const INVALID: &'static str = "\u{FFFD}";
@@ -226,8 +229,11 @@ pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
if bs.is_empty() {
("", 0)
- } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
- start = adjust_rev_for_regional_indicator(bs, start);
+ } else if let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ GRAPHEME_BREAK_REV.try_search_rev(&input).unwrap()
+ } {
+ let start = adjust_rev_for_regional_indicator(bs, hm.offset());
// Safe because a match can only occur for valid UTF-8.
let grapheme = unsafe { bs[start..].to_str_unchecked() };
(grapheme, grapheme.len())
@@ -266,8 +272,11 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
// regional indicator codepoints. A fix probably requires refactoring this
// code a bit such that we don't rescan regional indicators.
let mut count = 0;
- while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
- bs = &bs[..start];
+ while let Some(hm) = {
+ let input = Input::new(bs).anchored(Anchored::Yes);
+ REGIONAL_INDICATOR_REV.try_search_rev(&input).unwrap()
+ } {
+ bs = &bs[..hm.offset()];
count += 1;
}
if count % 2 == 0 {