From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- .../rust/unicode-normalization/src/recompose.rs | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 third_party/rust/unicode-normalization/src/recompose.rs (limited to 'third_party/rust/unicode-normalization/src/recompose.rs') diff --git a/third_party/rust/unicode-normalization/src/recompose.rs b/third_party/rust/unicode-normalization/src/recompose.rs new file mode 100644 index 0000000000..2a1960afc8 --- /dev/null +++ b/third_party/rust/unicode-normalization/src/recompose.rs @@ -0,0 +1,154 @@ +// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::decompose::Decompositions; +use core::fmt::{self, Write}; +use tinyvec::TinyVec; + +#[derive(Clone)] +enum RecompositionState { + Composing, + Purging(usize), + Finished(usize), +} + +/// External iterator for a string recomposition's characters. +#[derive(Clone)] +pub struct Recompositions { + iter: Decompositions, + state: RecompositionState, + buffer: TinyVec<[char; 4]>, + composee: Option, + last_ccc: Option, +} + +#[inline] +pub fn new_canonical>(iter: I) -> Recompositions { + Recompositions { + iter: super::decompose::new_canonical(iter), + state: self::RecompositionState::Composing, + buffer: TinyVec::new(), + composee: None, + last_ccc: None, + } +} + +#[inline] +pub fn new_compatible>(iter: I) -> Recompositions { + Recompositions { + iter: super::decompose::new_compatible(iter), + state: self::RecompositionState::Composing, + buffer: TinyVec::new(), + composee: None, + last_ccc: None, + } +} + +impl> Iterator for Recompositions { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + use self::RecompositionState::*; + + loop { + match self.state { + Composing => { + for ch in self.iter.by_ref() { + let ch_class = super::char::canonical_combining_class(ch); + let k = match self.composee { + None => { + if ch_class != 0 { + return Some(ch); + } + self.composee = Some(ch); + continue; + } + Some(k) => k, + }; + match self.last_ccc { + None => match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + if ch_class == 0 { + self.composee = Some(ch); + return Some(k); + } + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + } + }, + Some(l_class) => { + if l_class >= ch_class { + // `ch` is blocked from `composee` + if ch_class == 0 { + self.composee = Some(ch); + self.last_ccc = None; + self.state = Purging(0); + return Some(k); + } + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + continue; + } + match super::char::compose(k, ch) { + Some(r) => { + self.composee = Some(r); + continue; + } + None => { + self.buffer.push(ch); + self.last_ccc = Some(ch_class); + } + } + } + } + } + self.state = Finished(0); + if self.composee.is_some() { + return self.composee.take(); + } + } + Purging(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + self.state = Composing; + } + s => { + self.state = Purging(next + 1); + return s; + } + }, + Finished(next) => match self.buffer.get(next).cloned() { + None => { + self.buffer.clear(); + return self.composee.take(); + } + s => { + self.state = Finished(next + 1); + return s; + } + }, + } + } + } +} + +impl + Clone> fmt::Display for Recompositions { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.clone() { + f.write_char(c)?; + } + Ok(()) + } +} -- cgit v1.2.3