summaryrefslogtreecommitdiffstats
path: root/vendor/bstr/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bstr/src')
-rw-r--r--vendor/bstr/src/ascii.rs339
-rw-r--r--vendor/bstr/src/bstr.rs100
-rw-r--r--vendor/bstr/src/bstring.rs103
-rw-r--r--vendor/bstr/src/byteset/mod.rs115
-rw-r--r--vendor/bstr/src/byteset/scalar.rs309
-rw-r--r--vendor/bstr/src/ext_slice.rs3828
-rw-r--r--vendor/bstr/src/ext_vec.rs1124
-rw-r--r--vendor/bstr/src/impls.rs1125
-rw-r--r--vendor/bstr/src/io.rs515
-rw-r--r--vendor/bstr/src/lib.rs482
-rw-r--r--vendor/bstr/src/tests.rs32
-rw-r--r--vendor/bstr/src/unicode/data/GraphemeBreakTest.txt630
-rw-r--r--vendor/bstr/src/unicode/data/LICENSE-UNICODE45
-rw-r--r--vendor/bstr/src/unicode/data/SentenceBreakTest.txt530
-rw-r--r--vendor/bstr/src/unicode/data/WordBreakTest.txt1851
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfabin0 -> 10781 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfabin0 -> 10781 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfabin0 -> 55271 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfabin0 -> 55271 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/mod.rs8
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfabin0 -> 366 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfabin0 -> 366 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfabin0 -> 153619 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfabin0 -> 153619 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfabin0 -> 9237 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfabin0 -> 9237 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/simple_word_fwd.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfabin0 -> 572 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfabin0 -> 572 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfabin0 -> 884 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfabin0 -> 884 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs41
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfabin0 -> 236309 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfabin0 -> 236309 bytes
-rw-r--r--vendor/bstr/src/unicode/fsm/word_break_fwd.rs41
-rw-r--r--vendor/bstr/src/unicode/grapheme.rs381
-rw-r--r--vendor/bstr/src/unicode/mod.rs12
-rw-r--r--vendor/bstr/src/unicode/sentence.rs225
-rw-r--r--vendor/bstr/src/unicode/whitespace.rs16
-rw-r--r--vendor/bstr/src/unicode/word.rs420
-rw-r--r--vendor/bstr/src/utf8.rs1369
46 files changed, 13887 insertions, 0 deletions
diff --git a/vendor/bstr/src/ascii.rs b/vendor/bstr/src/ascii.rs
new file mode 100644
index 000000000..259d41fe7
--- /dev/null
+++ b/vendor/bstr/src/ascii.rs
@@ -0,0 +1,339 @@
+use core::mem;
+
+// The following ~400 lines of code exists for exactly one purpose, which is
+// to optimize this code:
+//
+// byte_slice.iter().position(|&b| b > 0x7F).unwrap_or(byte_slice.len())
+//
+// Yes... Overengineered is a word that comes to mind, but this is effectively
+// a very similar problem to memchr, and virtually nobody has been able to
+// resist optimizing the crap out of that (except for perhaps the BSD and MUSL
+// folks). In particular, this routine makes a very common case (ASCII) very
+// fast, which seems worth it. We do stop short of adding AVX variants of the
+// code below in order to retain our sanity and also to avoid needing to deal
+// with runtime target feature detection. RESIST!
+//
+// In order to understand the SIMD version below, it would be good to read this
+// comment describing how my memchr routine works:
+// https://github.com/BurntSushi/rust-memchr/blob/b0a29f267f4a7fad8ffcc8fe8377a06498202883/src/x86/sse2.rs#L19-L106
+//
+// The primary difference with memchr is that for ASCII, we can do a bit less
+// work. In particular, we don't need to detect the presence of a specific
+// byte, but rather, whether any byte has its most significant bit set. That
+// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
+// _mm_movemask_epi8.
+
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+const USIZE_BYTES: usize = mem::size_of::<usize>();
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+// This is a mask where the most significant bit of each byte in the usize
+// is set. We test this bit to determine whether a character is ASCII or not.
+// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
+// most significant bit is not set.
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+const ASCII_MASK_U64: u64 = 0x8080808080808080;
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
+
+/// Returns the index of the first non ASCII byte in the given slice.
+///
+/// If slice only contains ASCII bytes, then the length of the slice is
+/// returned.
+pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
+ #[cfg(any(miri, not(target_arch = "x86_64")))]
+ {
+ first_non_ascii_byte_fallback(slice)
+ }
+
+ #[cfg(all(not(miri), target_arch = "x86_64"))]
+ {
+ first_non_ascii_byte_sse2(slice)
+ }
+}
+
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
+ let align = USIZE_BYTES - 1;
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < USIZE_BYTES {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ let mask = chunk & ASCII_MASK;
+ if mask != 0 {
+ return first_non_ascii_byte_mask(mask);
+ }
+
+ ptr = ptr_add(ptr, USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(ptr_sub(end_ptr, USIZE_BYTES) >= start_ptr);
+ if slice.len() >= FALLBACK_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, FALLBACK_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+ if (a | b) & ASCII_MASK != 0 {
+ // What a kludge. We wrap the position finding code into
+ // a non-inlineable function, which makes the codegen in
+ // the tight loop above a bit better by avoiding a
+ // couple extra movs. We pay for it by two additional
+ // stores, but only in the case of finding a non-ASCII
+ // byte.
+ #[inline(never)]
+ unsafe fn findpos(
+ start_ptr: *const u8,
+ ptr: *const u8,
+ ) -> usize {
+ let a = *(ptr as *const usize);
+ let b = *(ptr_add(ptr, USIZE_BYTES) as *const usize);
+
+ let mut at = sub(ptr, start_ptr);
+ let maska = a & ASCII_MASK;
+ if maska != 0 {
+ return at + first_non_ascii_byte_mask(maska);
+ }
+
+ at += USIZE_BYTES;
+ let maskb = b & ASCII_MASK;
+ debug_assert!(maskb != 0);
+ return at + first_non_ascii_byte_mask(maskb);
+ }
+ return findpos(start_ptr, ptr);
+ }
+ ptr = ptr_add(ptr, FALLBACK_LOOP_SIZE);
+ }
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[cfg(all(not(miri), target_arch = "x86_64"))]
+fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
+ use core::arch::x86_64::*;
+
+ const VECTOR_SIZE: usize = mem::size_of::<__m128i>();
+ const VECTOR_ALIGN: usize = VECTOR_SIZE - 1;
+ const VECTOR_LOOP_SIZE: usize = 4 * VECTOR_SIZE;
+
+ let start_ptr = slice.as_ptr();
+ let end_ptr = slice[slice.len()..].as_ptr();
+ let mut ptr = start_ptr;
+
+ unsafe {
+ if slice.len() < VECTOR_SIZE {
+ return first_non_ascii_byte_slow(start_ptr, end_ptr, ptr);
+ }
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return mask.trailing_zeros() as usize;
+ }
+
+ ptr = ptr.add(VECTOR_SIZE - (start_ptr as usize & VECTOR_ALIGN));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(VECTOR_SIZE) >= start_ptr);
+ if slice.len() >= VECTOR_LOOP_SIZE {
+ while ptr <= ptr_sub(end_ptr, VECTOR_LOOP_SIZE) {
+ debug_assert_eq!(0, (ptr as usize) % VECTOR_SIZE);
+
+ let a = _mm_load_si128(ptr as *const __m128i);
+ let b = _mm_load_si128(ptr.add(VECTOR_SIZE) as *const __m128i);
+ let c =
+ _mm_load_si128(ptr.add(2 * VECTOR_SIZE) as *const __m128i);
+ let d =
+ _mm_load_si128(ptr.add(3 * VECTOR_SIZE) as *const __m128i);
+
+ let or1 = _mm_or_si128(a, b);
+ let or2 = _mm_or_si128(c, d);
+ let or3 = _mm_or_si128(or1, or2);
+ if _mm_movemask_epi8(or3) != 0 {
+ let mut at = sub(ptr, start_ptr);
+ let mask = _mm_movemask_epi8(a);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(b);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(c);
+ if mask != 0 {
+ return at + mask.trailing_zeros() as usize;
+ }
+
+ at += VECTOR_SIZE;
+ let mask = _mm_movemask_epi8(d);
+ debug_assert!(mask != 0);
+ return at + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr_add(ptr, VECTOR_LOOP_SIZE);
+ }
+ }
+ while ptr <= end_ptr.sub(VECTOR_SIZE) {
+ debug_assert!(sub(end_ptr, ptr) >= VECTOR_SIZE);
+
+ let chunk = _mm_loadu_si128(ptr as *const __m128i);
+ let mask = _mm_movemask_epi8(chunk);
+ if mask != 0 {
+ return sub(ptr, start_ptr) + mask.trailing_zeros() as usize;
+ }
+ ptr = ptr.add(VECTOR_SIZE);
+ }
+ first_non_ascii_byte_slow(start_ptr, end_ptr, ptr)
+ }
+}
+
+#[inline(always)]
+unsafe fn first_non_ascii_byte_slow(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+) -> usize {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if *ptr > 0x7F {
+ return sub(ptr, start_ptr);
+ }
+ ptr = ptr.offset(1);
+ }
+ sub(end_ptr, start_ptr)
+}
+
+/// Compute the position of the first ASCII byte in the given mask.
+///
+/// The mask should be computed by `chunk & ASCII_MASK`, where `chunk` is
+/// 8 contiguous bytes of the slice being checked where *at least* one of those
+/// bytes is not an ASCII byte.
+///
+/// The position returned is always in the inclusive range [0, 7].
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+fn first_non_ascii_byte_mask(mask: usize) -> usize {
+ #[cfg(target_endian = "little")]
+ {
+ mask.trailing_zeros() as usize / 8
+ }
+ #[cfg(target_endian = "big")]
+ {
+ mask.leading_zeros() as usize / 8
+ }
+}
+
+/// Increment the given pointer by the given amount.
+unsafe fn ptr_add(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset(amt as isize)
+}
+
+/// Decrement the given pointer by the given amount.
+unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
+ debug_assert!(amt < ::core::isize::MAX as usize);
+ ptr.offset((amt as isize).wrapping_neg())
+}
+
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ use core::ptr;
+
+ let mut n: usize = 0;
+ ptr::copy_nonoverlapping(ptr, &mut n as *mut _ as *mut u8, USIZE_BYTES);
+ n
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Our testing approach here is to try and exhaustively test every case.
+ // This includes the position at which a non-ASCII byte occurs in addition
+ // to the alignment of the slice that we're searching.
+
+ #[test]
+ fn positive_fallback_forward() {
+ for i in 0..517 {
+ let s = "a".repeat(i);
+ assert_eq!(
+ i,
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, len: {:?}, s: {:?}",
+ i,
+ s.len(),
+ s
+ );
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ #[cfg(not(miri))]
+ fn positive_sse2_forward() {
+ for i in 0..517 {
+ let b = "a".repeat(i).into_bytes();
+ assert_eq!(b.len(), first_non_ascii_byte_sse2(&b));
+ }
+ }
+
+ #[test]
+ #[cfg(not(miri))]
+ fn negative_fallback_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_fallback(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+
+ #[test]
+ #[cfg(target_arch = "x86_64")]
+ #[cfg(not(miri))]
+ fn negative_sse2_forward() {
+ for i in 0..517 {
+ for align in 0..65 {
+ let mut s = "a".repeat(i);
+ s.push_str("☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃☃");
+ let s = s.get(align..).unwrap_or("");
+ assert_eq!(
+ i.saturating_sub(align),
+ first_non_ascii_byte_sse2(s.as_bytes()),
+ "i: {:?}, align: {:?}, len: {:?}, s: {:?}",
+ i,
+ align,
+ s.len(),
+ s
+ );
+ }
+ }
+ }
+}
diff --git a/vendor/bstr/src/bstr.rs b/vendor/bstr/src/bstr.rs
new file mode 100644
index 000000000..5036f0655
--- /dev/null
+++ b/vendor/bstr/src/bstr.rs
@@ -0,0 +1,100 @@
+use core::mem;
+
+#[cfg(feature = "alloc")]
+use alloc::boxed::Box;
+
+/// A wrapper for `&[u8]` that provides convenient string oriented trait impls.
+///
+/// If you need ownership or a growable byte string buffer, then use
+/// [`BString`](struct.BString.html).
+///
+/// Using a `&BStr` is just like using a `&[u8]`, since `BStr`
+/// implements `Deref` to `[u8]`. So all methods available on `[u8]`
+/// are also available on `BStr`.
+///
+/// # Representation
+///
+/// A `&BStr` has the same representation as a `&str`. That is, a `&BStr` is
+/// a fat pointer which consists of a pointer to some bytes and a length.
+///
+/// # Trait implementations
+///
+/// The `BStr` type has a number of trait implementations, and in particular,
+/// defines equality and ordinal comparisons between `&BStr`, `&str` and
+/// `&[u8]` for convenience.
+///
+/// The `Debug` implementation for `BStr` shows its bytes as a normal string.
+/// For invalid UTF-8, hex escape sequences are used.
+///
+/// The `Display` implementation behaves as if `BStr` were first lossily
+/// converted to a `str`. Invalid UTF-8 bytes are substituted with the Unicode
+/// replacement codepoint, which looks like this: �.
+#[derive(Hash)]
+#[repr(transparent)]
+pub struct BStr {
+ pub(crate) bytes: [u8],
+}
+
+impl BStr {
+ /// Directly creates a `BStr` slice from anything that can be converted
+ /// to a byte slice.
+ ///
+ /// This is very similar to the [`B`](crate::B) function, except this
+ /// returns a `&BStr` instead of a `&[u8]`.
+ ///
+ /// This is a cost-free conversion.
+ ///
+ /// # Example
+ ///
+ /// You can create `BStr`'s from byte arrays, byte slices or even string
+ /// slices:
+ ///
+ /// ```
+ /// use bstr::BStr;
+ ///
+ /// let a = BStr::new(b"abc");
+ /// let b = BStr::new(&b"abc"[..]);
+ /// let c = BStr::new("abc");
+ ///
+ /// assert_eq!(a, b);
+ /// assert_eq!(a, c);
+ /// ```
+ #[inline]
+ pub fn new<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a BStr {
+ BStr::from_bytes(bytes.as_ref())
+ }
+
+ #[inline]
+ pub(crate) fn new_mut<B: ?Sized + AsMut<[u8]>>(
+ bytes: &mut B,
+ ) -> &mut BStr {
+ BStr::from_bytes_mut(bytes.as_mut())
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes(slice: &[u8]) -> &BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ pub(crate) fn from_bytes_mut(slice: &mut [u8]) -> &mut BStr {
+ unsafe { mem::transmute(slice) }
+ }
+
+ #[inline]
+ #[cfg(feature = "alloc")]
+ pub(crate) fn from_boxed_bytes(slice: Box<[u8]>) -> Box<BStr> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ #[cfg(feature = "alloc")]
+ pub(crate) fn into_boxed_bytes(slice: Box<BStr>) -> Box<[u8]> {
+ unsafe { Box::from_raw(Box::into_raw(slice) as _) }
+ }
+
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+}
diff --git a/vendor/bstr/src/bstring.rs b/vendor/bstr/src/bstring.rs
new file mode 100644
index 000000000..d144b1d85
--- /dev/null
+++ b/vendor/bstr/src/bstring.rs
@@ -0,0 +1,103 @@
+use alloc::vec::Vec;
+
+use crate::bstr::BStr;
+
+/// A wrapper for `Vec<u8>` that provides convenient string oriented trait
+/// impls.
+///
+/// A `BString` has ownership over its contents and corresponds to
+/// a growable or shrinkable buffer. Its borrowed counterpart is a
+/// [`BStr`](struct.BStr.html), called a byte string slice.
+///
+/// Using a `BString` is just like using a `Vec<u8>`, since `BString`
+/// implements `Deref` to `Vec<u8>`. So all methods available on `Vec<u8>`
+/// are also available on `BString`.
+///
+/// # Examples
+///
+/// You can create a new `BString` from a `Vec<u8>` via a `From` impl:
+///
+/// ```
+/// use bstr::BString;
+///
+/// let s = BString::from("Hello, world!");
+/// ```
+///
+/// # Deref
+///
+/// The `BString` type implements `Deref` and `DerefMut`, where the target
+/// types are `&Vec<u8>` and `&mut Vec<u8>`, respectively. `Deref` permits all of the
+/// methods defined on `Vec<u8>` to be implicitly callable on any `BString`.
+///
+/// For more information about how deref works, see the documentation for the
+/// [`std::ops::Deref`](https://doc.rust-lang.org/std/ops/trait.Deref.html)
+/// trait.
+///
+/// # Representation
+///
+/// A `BString` has the same representation as a `Vec<u8>` and a `String`.
+/// That is, it is made up of three word sized components: a pointer to a
+/// region of memory containing the bytes, a length and a capacity.
+#[derive(Clone, Hash)]
+pub struct BString {
+ bytes: Vec<u8>,
+}
+
+impl BString {
+ /// Constructs a new `BString` from the given [`Vec`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::BString;
+ ///
+ /// let mut b = BString::new(Vec::with_capacity(10));
+ /// ```
+ ///
+ /// This function is `const`:
+ ///
+ /// ```
+ /// use bstr::BString;
+ ///
+ /// const B: BString = BString::new(vec![]);
+ /// ```
+ #[inline]
+ pub const fn new(bytes: Vec<u8>) -> BString {
+ BString { bytes }
+ }
+
+ #[inline]
+ pub(crate) fn as_bytes(&self) -> &[u8] {
+ &self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn as_bytes_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn as_bstr(&self) -> &BStr {
+ BStr::new(&self.bytes)
+ }
+
+ #[inline]
+ pub(crate) fn as_mut_bstr(&mut self) -> &mut BStr {
+ BStr::new_mut(&mut self.bytes)
+ }
+
+ #[inline]
+ pub(crate) fn as_vec(&self) -> &Vec<u8> {
+ &self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn as_vec_mut(&mut self) -> &mut Vec<u8> {
+ &mut self.bytes
+ }
+
+ #[inline]
+ pub(crate) fn into_vec(self) -> Vec<u8> {
+ self.bytes
+ }
+}
diff --git a/vendor/bstr/src/byteset/mod.rs b/vendor/bstr/src/byteset/mod.rs
new file mode 100644
index 000000000..c6c697c19
--- /dev/null
+++ b/vendor/bstr/src/byteset/mod.rs
@@ -0,0 +1,115 @@
+use memchr::{memchr, memchr2, memchr3, memrchr, memrchr2, memrchr3};
+
+mod scalar;
+
+#[inline]
+fn build_table(byteset: &[u8]) -> [u8; 256] {
+ let mut table = [0u8; 256];
+ for &b in byteset {
+ table[b as usize] = 1;
+ }
+ table
+}
+
+#[inline]
+pub(crate) fn find(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memchr(byteset[0], haystack),
+ 2 => memchr2(byteset[0], byteset[1], haystack),
+ 3 => memchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn rfind(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ match byteset.len() {
+ 0 => return None,
+ 1 => memrchr(byteset[0], haystack),
+ 2 => memrchr2(byteset[0], byteset[1], haystack),
+ 3 => memrchr3(byteset[0], byteset[1], byteset[2], haystack),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] != 0)
+ }
+ }
+}
+
+#[inline]
+pub(crate) fn find_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(0),
+ 1 => scalar::inv_memchr(byteset[0], haystack),
+ 2 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::forward_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::forward_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+#[inline]
+pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
+ if haystack.is_empty() {
+ return None;
+ }
+ match byteset.len() {
+ 0 => return Some(haystack.len() - 1),
+ 1 => scalar::inv_memrchr(byteset[0], haystack),
+ 2 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1]
+ }),
+ 3 => scalar::reverse_search_bytes(haystack, |b| {
+ b != byteset[0] && b != byteset[1] && b != byteset[2]
+ }),
+ _ => {
+ let table = build_table(byteset);
+ scalar::reverse_search_bytes(haystack, |b| table[b as usize] == 0)
+ }
+ }
+}
+
+#[cfg(all(test, feature = "std", not(miri)))]
+mod tests {
+ quickcheck::quickcheck! {
+ fn qc_byteset_forward_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find(&haystack, &needles)
+ == haystack.iter().position(|b| needles.contains(b))
+ }
+ fn qc_byteset_backwards_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind(&haystack, &needles)
+ == haystack.iter().rposition(|b| needles.contains(b))
+ }
+ fn qc_byteset_forward_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::find_not(&haystack, &needles)
+ == haystack.iter().position(|b| !needles.contains(b))
+ }
+ fn qc_byteset_backwards_not_matches_naive(
+ haystack: Vec<u8>,
+ needles: Vec<u8>
+ ) -> bool {
+ super::rfind_not(&haystack, &needles)
+ == haystack.iter().rposition(|b| !needles.contains(b))
+ }
+ }
+}
diff --git a/vendor/bstr/src/byteset/scalar.rs b/vendor/bstr/src/byteset/scalar.rs
new file mode 100644
index 000000000..28bff673d
--- /dev/null
+++ b/vendor/bstr/src/byteset/scalar.rs
@@ -0,0 +1,309 @@
+// This is adapted from `fallback.rs` from rust-memchr. It's modified to return
+// the 'inverse' query of memchr, e.g. finding the first byte not in the
+// provided set. This is simple for the 1-byte case.
+
+use core::{cmp, usize};
+
+#[cfg(target_pointer_width = "32")]
+const USIZE_BYTES: usize = 4;
+
+#[cfg(target_pointer_width = "64")]
+const USIZE_BYTES: usize = 8;
+
+// The number of bytes to loop at in one iteration of memchr/memrchr.
+const LOOP_SIZE: usize = 2 * USIZE_BYTES;
+
+/// Repeat the given byte into a word size number. That is, every 8 bits
+/// is equivalent to the given byte. For example, if `b` is `\x4E` or
+/// `01001110` in binary, then the returned value on a 32-bit system would be:
+/// `01001110_01001110_01001110_01001110`.
+#[inline(always)]
+fn repeat_byte(b: u8) -> usize {
+ (b as usize) * (usize::MAX / 255)
+}
+
+pub fn inv_memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+
+ unsafe {
+ let end_ptr = haystack.as_ptr().add(haystack.len());
+ let mut ptr = start_ptr;
+
+ if haystack.len() < USIZE_BYTES {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr);
+ if (chunk ^ vn1) != 0 {
+ return forward_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align));
+ debug_assert!(ptr > start_ptr);
+ debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr);
+ while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr as *const usize);
+ let b = *(ptr.add(USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.add(LOOP_SIZE);
+ }
+ forward_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+/// Return the last index not matching the byte `x` in `text`.
+pub fn inv_memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
+ let vn1 = repeat_byte(n1);
+ let confirm = |byte| byte != n1;
+ let loop_size = cmp::min(LOOP_SIZE, haystack.len());
+ let align = USIZE_BYTES - 1;
+ let start_ptr = haystack.as_ptr();
+
+ unsafe {
+ let end_ptr = haystack.as_ptr().add(haystack.len());
+ let mut ptr = end_ptr;
+
+ if haystack.len() < USIZE_BYTES {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ let chunk = read_unaligned_usize(ptr.sub(USIZE_BYTES));
+ if (chunk ^ vn1) != 0 {
+ return reverse_search(start_ptr, end_ptr, ptr, confirm);
+ }
+
+ ptr = ptr.sub(end_ptr as usize & align);
+ debug_assert!(start_ptr <= ptr && ptr <= end_ptr);
+ while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) {
+ debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES);
+
+ let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize);
+ let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize);
+ let eqa = (a ^ vn1) != 0;
+ let eqb = (b ^ vn1) != 0;
+ if eqa || eqb {
+ break;
+ }
+ ptr = ptr.sub(loop_size);
+ }
+ reverse_search(start_ptr, end_ptr, ptr, confirm)
+ }
+}
+
+#[inline(always)]
+unsafe fn forward_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr < end_ptr {
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ ptr = ptr.offset(1);
+ }
+ None
+}
+
+#[inline(always)]
+unsafe fn reverse_search<F: Fn(u8) -> bool>(
+ start_ptr: *const u8,
+ end_ptr: *const u8,
+ mut ptr: *const u8,
+ confirm: F,
+) -> Option<usize> {
+ debug_assert!(start_ptr <= ptr);
+ debug_assert!(ptr <= end_ptr);
+
+ while ptr > start_ptr {
+ ptr = ptr.offset(-1);
+ if confirm(*ptr) {
+ return Some(sub(ptr, start_ptr));
+ }
+ }
+ None
+}
+
+unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
+ (ptr as *const usize).read_unaligned()
+}
+
+/// Subtract `b` from `a` and return the difference. `a` should be greater than
+/// or equal to `b`.
+fn sub(a: *const u8, b: *const u8) -> usize {
+ debug_assert!(a >= b);
+ (a as usize) - (b as usize)
+}
+
+/// Safe wrapper around `forward_search`
+#[inline]
+pub(crate) fn forward_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ forward_search(start, end, start, confirm)
+ }
+}
+
+/// Safe wrapper around `reverse_search`
+#[inline]
+pub(crate) fn reverse_search_bytes<F: Fn(u8) -> bool>(
+ s: &[u8],
+ confirm: F,
+) -> Option<usize> {
+ unsafe {
+ let start = s.as_ptr();
+ let end = start.add(s.len());
+ reverse_search(start, end, end, confirm)
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ use super::{inv_memchr, inv_memrchr};
+
+ // search string, search byte, inv_memchr result, inv_memrchr result.
+ // these are expanded into a much larger set of tests in build_tests
+ const TESTS: &[(&[u8], u8, usize, usize)] = &[
+ (b"z", b'a', 0, 0),
+ (b"zz", b'a', 0, 1),
+ (b"aza", b'a', 1, 1),
+ (b"zaz", b'a', 0, 2),
+ (b"zza", b'a', 0, 1),
+ (b"zaa", b'a', 0, 0),
+ (b"zzz", b'a', 0, 2),
+ ];
+
+ type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
+
+ fn build_tests() -> Vec<TestCase> {
+ #[cfg(not(miri))]
+ const MAX_PER: usize = 515;
+ #[cfg(miri)]
+ const MAX_PER: usize = 10;
+
+ let mut result = vec![];
+ for &(search, byte, fwd_pos, rev_pos) in TESTS {
+ result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
+ for i in 1..MAX_PER {
+ // add a bunch of copies of the search byte to the end.
+ let mut suffixed: Vec<u8> = search.into();
+ suffixed.extend(std::iter::repeat(byte).take(i));
+ result.push((suffixed, byte, Some((fwd_pos, rev_pos))));
+
+ // add a bunch of copies of the search byte to the start.
+ let mut prefixed: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ prefixed.extend(search);
+ result.push((
+ prefixed,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+
+ // add a bunch of copies of the search byte to both ends.
+ let mut surrounded: Vec<u8> =
+ std::iter::repeat(byte).take(i).collect();
+ surrounded.extend(search);
+ surrounded.extend(std::iter::repeat(byte).take(i));
+ result.push((
+ surrounded,
+ byte,
+ Some((fwd_pos + i, rev_pos + i)),
+ ));
+ }
+ }
+
+ // build non-matching tests for several sizes
+ for i in 0..MAX_PER {
+ result.push((
+ std::iter::repeat(b'\0').take(i).collect(),
+ b'\0',
+ None,
+ ));
+ }
+
+ result
+ }
+
+ #[test]
+ fn test_inv_memchr() {
+ use crate::{ByteSlice, B};
+
+ #[cfg(not(miri))]
+ const MAX_OFFSET: usize = 130;
+ #[cfg(miri)]
+ const MAX_OFFSET: usize = 13;
+
+ for (search, byte, matching) in build_tests() {
+ assert_eq!(
+ inv_memchr(byte, &search),
+ matching.map(|m| m.0),
+ "inv_memchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &search),
+ matching.map(|m| m.1),
+ "inv_memrchr when searching for {:?} in {:?}",
+ byte as char,
+ // better printing
+ B(&search).as_bstr(),
+ );
+ // Test a rather large number off offsets for potential alignment
+ // issues.
+ for offset in 1..MAX_OFFSET {
+ if offset >= search.len() {
+ break;
+ }
+ // If this would cause us to shift the results off the end,
+ // skip it so that we don't have to recompute them.
+ if let Some((f, r)) = matching {
+ if offset > f || offset > r {
+ break;
+ }
+ }
+ let realigned = &search[offset..];
+
+ let forward_pos = matching.map(|m| m.0 - offset);
+ let reverse_pos = matching.map(|m| m.1 - offset);
+
+ assert_eq!(
+ inv_memchr(byte, &realigned),
+ forward_pos,
+ "inv_memchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ assert_eq!(
+ inv_memrchr(byte, &realigned),
+ reverse_pos,
+ "inv_memrchr when searching (realigned by {}) for {:?} in {:?}",
+ offset,
+ byte as char,
+ realigned.as_bstr(),
+ );
+ }
+ }
+ }
+}
diff --git a/vendor/bstr/src/ext_slice.rs b/vendor/bstr/src/ext_slice.rs
new file mode 100644
index 000000000..91af45083
--- /dev/null
+++ b/vendor/bstr/src/ext_slice.rs
@@ -0,0 +1,3828 @@
+use core::{iter, slice, str};
+
+#[cfg(all(feature = "alloc", feature = "unicode"))]
+use alloc::vec;
+#[cfg(feature = "alloc")]
+use alloc::{borrow::Cow, string::String, vec::Vec};
+
+#[cfg(feature = "std")]
+use std::{ffi::OsStr, path::Path};
+
+use memchr::{memchr, memmem, memrchr};
+
+#[cfg(feature = "alloc")]
+use crate::ext_vec::ByteVec;
+#[cfg(feature = "unicode")]
+use crate::unicode::{
+ whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
+ SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
+ WordsWithBreaks,
+};
+use crate::{
+ ascii,
+ bstr::BStr,
+ byteset,
+ utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
+};
+
+/// A short-hand constructor for building a `&[u8]`.
+///
+/// This idiosyncratic constructor is useful for concisely building byte string
+/// slices. Its primary utility is in conveniently writing byte string literals
+/// in a uniform way. For example, consider this code that does not compile:
+///
+/// ```ignore
+/// let strs = vec![b"a", b"xy"];
+/// ```
+///
+/// The above code doesn't compile because the type of the byte string literal
+/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
+/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
+/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
+/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
+///
+/// One way of getting the above code to compile is to convert byte strings to
+/// slices. You might try this:
+///
+/// ```ignore
+/// let strs = vec![&b"a", &b"xy"];
+/// ```
+///
+/// But this just creates values with type `& &'static [u8; 1]` and
+/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
+///
+/// ```
+/// let strs = vec![&b"a"[..], &b"xy"[..]];
+/// // or
+/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
+/// ```
+///
+/// But neither of these are particularly convenient to type, especially when
+/// it's something as common as a string literal. Thus, this constructor
+/// permits writing the following instead:
+///
+/// ```
+/// use bstr::B;
+///
+/// let strs = vec![B("a"), B(b"xy")];
+/// ```
+///
+/// Notice that this also lets you mix and match both string literals and byte
+/// string literals. This can be quite convenient!
+#[allow(non_snake_case)]
+#[inline]
+pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
+ bytes.as_ref()
+}
+
+impl ByteSlice for [u8] {
+ #[inline]
+ fn as_bytes(&self) -> &[u8] {
+ self
+ }
+
+ #[inline]
+ fn as_bytes_mut(&mut self) -> &mut [u8] {
+ self
+ }
+}
+
+impl<const N: usize> ByteSlice for [u8; N] {
+ #[inline]
+ fn as_bytes(&self) -> &[u8] {
+ self
+ }
+
+ #[inline]
+ fn as_bytes_mut(&mut self) -> &mut [u8] {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+mod private {
+ pub trait Sealed {}
+}
+impl private::Sealed for [u8] {}
+impl<const N: usize> private::Sealed for [u8; N] {}
+
+/// A trait that extends `&[u8]` with string oriented methods.
+///
+/// This trait is sealed and cannot be implemented outside of `bstr`.
+pub trait ByteSlice: private::Sealed {
+ /// A method for accessing the raw bytes of this type. This is always a
+ /// no-op and callers shouldn't care about it. This only exists for making
+ /// the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes(&self) -> &[u8];
+
+ /// A method for accessing the raw bytes of this type, mutably. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_bytes_mut(&mut self) -> &mut [u8];
+
+ /// Return this byte slice as a `&BStr`.
+ ///
+ /// Use `&BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// println!("{:?}", b"foo\xFFbar".as_bstr());
+ /// ```
+ #[inline]
+ fn as_bstr(&self) -> &BStr {
+ BStr::new(self.as_bytes())
+ }
+
+ /// Return this byte slice as a `&mut BStr`.
+ ///
+ /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
+ /// and various other trait implementations (such as `PartialEq` and
+ /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
+ /// shows its bytes as a normal string. For invalid UTF-8, hex escape
+ /// sequences are used.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bytes = *b"foo\xFFbar";
+ /// println!("{:?}", &mut bytes.as_bstr_mut());
+ /// ```
+ #[inline]
+ fn as_bstr_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self.as_bytes_mut())
+ }
+
+ /// Create an immutable byte string from an OS string slice.
+ ///
+ /// When the underlying bytes of OS strings are accessible, then this
+ /// always succeeds and is zero cost. Otherwise, this returns `None` if the
+ /// given OS string is not valid UTF-8. (For example, when the underlying
+ /// bytes are inaccessible on Windows, file paths are allowed to be a
+ /// sequence of arbitrary 16-bit integers. Not all such sequences can be
+ /// transcoded to valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Some(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: &OsStr) -> Option<&[u8]> {
+ os_str.to_str().map(|s| s.as_bytes())
+ }
+
+ imp(os_str)
+ }
+
+ /// Create an immutable byte string from a file path.
+ ///
+ /// When the underlying bytes of paths are accessible, then this always
+ /// succeeds and is zero cost. Otherwise, this returns `None` if the given
+ /// path is not valid UTF-8. (For example, when the underlying bytes are
+ /// inaccessible on Windows, file paths are allowed to be a sequence of
+ /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
+ /// valid UTF-8.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn from_path(path: &Path) -> Option<&[u8]> {
+ Self::from_os_str(path.as_os_str())
+ }
+
+ /// Safely convert this byte string into a `&str` if it's valid UTF-8.
+ ///
+ /// If this byte string is not valid UTF-8, then an error is returned. The
+ /// error returned indicates the first invalid byte found and the length
+ /// of the error.
+ ///
+ /// In cases where a lossy conversion to `&str` is acceptable, then use one
+ /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
+ /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
+ /// methods.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// # #[cfg(feature = "alloc")] {
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// # fn example() -> Result<(), bstr::Utf8Error> {
+ /// let s = B("☃βツ").to_str()?;
+ /// assert_eq!("☃βツ", s);
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// bstring.push(b'\xFF');
+ /// let err = bstring.to_str().unwrap_err();
+ /// assert_eq!(8, err.valid_up_to());
+ /// # Ok(()) }; example().unwrap()
+ /// # }
+ /// ```
+ #[inline]
+ fn to_str(&self) -> Result<&str, Utf8Error> {
+ utf8::validate(self.as_bytes()).map(|_| {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { str::from_utf8_unchecked(self.as_bytes()) }
+ })
+ }
+
+ /// Unsafely convert this byte string into a `&str`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `&str` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { B("☃βツ").to_str_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn to_str_unchecked(&self) -> &str {
+ str::from_utf8_unchecked(self.as_bytes())
+ }
+
+ /// Convert this byte string to a valid UTF-8 string by replacing invalid
+ /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// If the byte string is already valid UTF-8, then no copying or
+ /// allocation is performed and a borrrowed string slice is returned. If
+ /// the byte string is not valid UTF-8, then an owned string buffer is
+ /// returned with invalid bytes replaced by the replacement codepoint.
+ ///
+ /// This method uses the "substitution of maximal subparts" (Unicode
+ /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
+ /// codepoint. Specifically, a replacement codepoint is inserted whenever a
+ /// byte is found that cannot possibly lead to a valid code unit sequence.
+ /// If there were previous bytes that represented a prefix of a well-formed
+ /// code unit sequence, then all of those bytes are substituted with a
+ /// single replacement codepoint. The "substitution of maximal subparts"
+ /// strategy is the same strategy used by
+ /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
+ /// For a more precise description of the maximal subpart strategy, see
+ /// the Unicode Standard, Chapter 3, Section 9. See also
+ /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
+ ///
+ /// N.B. Rust's standard library also appears to use the same strategy,
+ /// but it does not appear to be an API guarantee.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
+ ///
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
+ /// ```
+ ///
+ /// This demonstrates the "maximal subpart" substitution logic.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// // \x61 is the ASCII codepoint for 'a'.
+ /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
+ /// // \xE1\x80 is a valid 2-byte code unit prefix.
+ /// // \xC2 is a valid 1-byte code unit prefix.
+ /// // \x62 is the ASCII codepoint for 'b'.
+ /// //
+ /// // In sum, each of the prefixes is replaced by a single replacement
+ /// // codepoint since none of the prefixes are properly completed. This
+ /// // is in contrast to other strategies that might insert a replacement
+ /// // codepoint for every single byte.
+ /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
+ /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn to_str_lossy(&self) -> Cow<'_, str> {
+ match utf8::validate(self.as_bytes()) {
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe {
+ Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
+ }
+ }
+ Err(err) => {
+ let mut lossy = String::with_capacity(self.as_bytes().len());
+ let (valid, after) =
+ self.as_bytes().split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ lossy.push_str("\u{FFFD}");
+ if let Some(len) = err.error_len() {
+ after[len..].to_str_lossy_into(&mut lossy);
+ }
+ Cow::Owned(lossy)
+ }
+ }
+ }
+
+ /// Copy the contents of this byte string into the given owned string
+ /// buffer, while replacing invalid UTF-8 code unit sequences with the
+ /// Unicode replacement codepoint (`U+FFFD`).
+ ///
+ /// This method uses the same "substitution of maximal subparts" strategy
+ /// for inserting the replacement codepoint as the
+ /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
+ ///
+ /// This routine is useful for amortizing allocation. However, unlike
+ /// `to_str_lossy`, this routine will _always_ copy the contents of this
+ /// byte string into the destination buffer, even if this byte string is
+ /// valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut bstring = <Vec<u8>>::from("☃βツ");
+ /// // Add a byte that makes the sequence invalid.
+ /// bstring.push(b'\xFF');
+ ///
+ /// let mut dest = String::new();
+ /// bstring.to_str_lossy_into(&mut dest);
+ /// assert_eq!("☃βツ\u{FFFD}", dest);
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn to_str_lossy_into(&self, dest: &mut String) {
+ let mut bytes = self.as_bytes();
+ dest.reserve(bytes.len());
+ loop {
+ match utf8::validate(bytes) {
+ Ok(()) => {
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `bytes` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
+ break;
+ }
+ Err(err) => {
+ let (valid, after) = bytes.split_at(err.valid_up_to());
+ // SAFETY: This is safe because utf8::validate guarantees
+ // that all of `valid` is valid UTF-8.
+ dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
+ dest.push_str("\u{FFFD}");
+ match err.error_len() {
+ None => break,
+ Some(len) => bytes = &after[len..],
+ }
+ }
+ }
+ }
+ }
+
+ /// Create an OS string slice from this byte string.
+ ///
+ /// When OS strings can be constructed from arbitrary byte sequences, this
+ /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
+ /// decoding error if this byte string is not valid UTF-8. (For example,
+ /// assuming the representation of `OsStr` is opaque on Windows, file paths
+ /// are allowed to be a sequence of arbitrary 16-bit integers. There is
+ /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
+ /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
+ /// is even opened up, then this will convert any sequence of bytes to an
+ /// `OsStr` without cost.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Ok(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
+ bytes.to_str().map(OsStr::new)
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Lossily create an OS string slice from this byte string.
+ ///
+ /// When OS strings can be constructed from arbitrary byte sequences, this
+ /// is zero cost and always returns a slice. Otherwise, this will perform a
+ /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+ /// the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths when
+ /// the representation of `OsStr` is opaque.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let os_str = b"foo\xFFbar".to_os_str_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(OsStr::from_bytes(bytes))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(bytes: &[u8]) -> Cow<OsStr> {
+ use std::ffi::OsString;
+
+ match bytes.to_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
+ Cow::Owned(x) => Cow::Owned(OsString::from(x)),
+ }
+ }
+
+ imp(self.as_bytes())
+ }
+
+ /// Create a path slice from this byte string.
+ ///
+ /// When paths can be constructed from arbitrary byte sequences, this
+ /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
+ /// decoding error if this byte string is not valid UTF-8. (For example,
+ /// assuming the representation of `Path` is opaque on Windows, file paths
+ /// are allowed to be a sequence of arbitrary 16-bit integers. There is
+ /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
+ /// arbitrary sequence of 16-bit integers. If the representation of `Path`
+ /// is even opened up, then this will convert any sequence of bytes to an
+ /// `Path` without cost.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let path = b"foo".to_path().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path(&self) -> Result<&Path, Utf8Error> {
+ self.to_os_str().map(Path::new)
+ }
+
+ /// Lossily create a path slice from this byte string.
+ ///
+ /// When paths can be constructed from arbitrary byte sequences, this is
+ /// zero cost and always returns a slice. Otherwise, this will perform a
+ /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+ /// the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths when
+ /// the representation of `Path` is opaque.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foo\xFFbar";
+ /// let path = bs.to_path_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn to_path_lossy(&self) -> Cow<'_, Path> {
+ use std::path::PathBuf;
+
+ match self.to_os_str_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
+ Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
+ }
+ }
+
+ /// Create a new byte string by repeating this byte string `n` times.
+ ///
+ /// # Panics
+ ///
+ /// This function panics if the capacity of the new byte string would
+ /// overflow.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
+ /// assert_eq!(b"foo".repeatn(0), B(""));
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn repeatn(&self, n: usize) -> Vec<u8> {
+ self.as_bytes().repeat(n)
+ }
+
+ /// Returns true if and only if this byte string contains the given needle.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".contains_str("foo"));
+ /// assert!(b"foo bar".contains_str("bar"));
+ /// assert!(!b"foo".contains_str("foobar"));
+ /// ```
+ #[inline]
+ fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
+ self.find(needle).is_some()
+ }
+
+ /// Returns true if and only if this byte string has the given prefix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".starts_with_str("foo"));
+ /// assert!(!b"foo bar".starts_with_str("bar"));
+ /// assert!(!b"foo".starts_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
+ self.as_bytes().starts_with(prefix.as_ref())
+ }
+
+ /// Returns true if and only if this byte string has the given suffix.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert!(b"foo bar".ends_with_str("bar"));
+ /// assert!(!b"foo bar".ends_with_str("foo"));
+ /// assert!(!b"bar".ends_with_str("foobar"));
+ /// ```
+ #[inline]
+ fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
+ self.as_bytes().ends_with(suffix.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.find("foo"));
+ /// assert_eq!(Some(4), s.find("bar"));
+ /// assert_eq!(None, s.find("quux"));
+ /// ```
+ #[inline]
+ fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ Finder::new(needle.as_ref()).find(self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given needle.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// Note that if you're are searching for the same needle in many
+ /// different small haystacks, it may be faster to initialize a
+ /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
+ /// each search.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar baz";
+ /// assert_eq!(Some(0), s.rfind("foo"));
+ /// assert_eq!(Some(4), s.rfind("bar"));
+ /// assert_eq!(Some(8), s.rfind("ba"));
+ /// assert_eq!(None, s.rfind("quux"));
+ /// ```
+ #[inline]
+ fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
+ FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle. The iterator yields byte offset positions indicating the start
+ /// of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.find_iter("foo").collect();
+ /// assert_eq!(matches, vec![0, 8, 12, 21]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".find_iter("").collect();
+ /// assert_eq!(matches, vec![0, 1, 2, 3]);
+ ///
+ /// let matches: Vec<usize> = b"".find_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ needle: &'n B,
+ ) -> Find<'h, 'n> {
+ Find::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns an iterator of the non-overlapping occurrences of the given
+ /// needle in reverse. The iterator yields byte offset positions indicating
+ /// the start of each match.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo bar foo foo quux foo";
+ /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
+ /// assert_eq!(matches, vec![21, 12, 8, 0]);
+ /// ```
+ ///
+ /// An empty string matches at every position, including the position
+ /// immediately following the last byte:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![3, 2, 1, 0]);
+ ///
+ /// let matches: Vec<usize> = b"".rfind_iter("").collect();
+ /// assert_eq!(matches, vec![0]);
+ /// ```
+ #[inline]
+ fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ needle: &'n B,
+ ) -> FindReverse<'h, 'n> {
+ FindReverse::new(self.as_bytes(), needle.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
+ /// ```
+ #[inline]
+ fn find_byte(&self, byte: u8) -> Option<usize> {
+ memchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the last occurrence of the given byte. If the
+ /// byte does not occur in this byte string, then `None` is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
+ /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
+ /// ```
+ #[inline]
+ fn rfind_byte(&self, byte: u8) -> Option<usize> {
+ memrchr(byte, self.as_bytes())
+ }
+
+ /// Returns the index of the first occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
+ /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".find_char('y'));
+ /// ```
+ #[inline]
+ fn find_char(&self, ch: char) -> Option<usize> {
+ self.find(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the last occurrence of the given codepoint.
+ /// If the codepoint does not occur in this byte string, then `None` is
+ /// returned.
+ ///
+ /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
+ /// then only explicit occurrences of that encoding will be found. Invalid
+ /// UTF-8 sequences will not be matched.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
+ /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
+ /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
+ /// ```
+ #[inline]
+ fn rfind_char(&self, ch: char) -> Option<usize> {
+ self.rfind(ch.encode_utf8(&mut [0; 4]))
+ }
+
+ /// Returns the index of the first occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
+ /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
+ /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
+ /// // The empty byteset never matches.
+ /// assert_eq!(None, b"abc".find_byteset(b""));
+ /// assert_eq!(None, b"".find_byteset(b""));
+ /// ```
+ #[inline]
+ fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the first occurrence of a byte that is not a
+ /// member of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
+ /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
+ /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
+ /// // The negation of the empty byteset matches everything.
+ /// assert_eq!(Some(0), b"abc".find_not_byteset(b""));
+ /// // But an empty string never contains anything.
+ /// assert_eq!(None, b"".find_not_byteset(b""));
+ /// ```
+ #[inline]
+ fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::find_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of any of the bytes in the
+ /// provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and duplicate
+ /// bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
+ /// ```
+ #[inline]
+ fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns the index of the last occurrence of a byte that is not a member
+ /// of the provided set.
+ ///
+ /// The `byteset` may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
+ /// note that passing a `&str` which contains multibyte characters may not
+ /// behave as you expect: each byte in the `&str` is treated as an
+ /// individual member of the byte set.
+ ///
+ /// Note that order is irrelevant for the `byteset` parameter, and
+ /// duplicate bytes present in its body are ignored.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the set of bytes and the haystack. That is, this
+ /// runs in `O(byteset.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
+ /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
+ /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
+ /// ```
+ #[inline]
+ fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
+ byteset::rfind_not(self.as_bytes(), byteset.as_ref())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated
+ /// by contiguous whitespace (according to the Unicode property
+ /// `White_Space`).
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\nquux \n");
+ /// let fields: Vec<&[u8]> = s.fields().collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of just whitespace yields no elements:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn fields(&self) -> Fields<'_> {
+ Fields::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the fields in a byte string, separated by
+ /// contiguous codepoints satisfying the given predicate.
+ ///
+ /// If this byte string is not valid UTF-8, then the given closure will
+ /// be called with a Unicode replacement codepoint when invalid UTF-8
+ /// bytes are seen.
+ ///
+ /// # Example
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo999999bar1quux123456";
+ /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
+ /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
+ /// ```
+ ///
+ /// A byte string consisting of all codepoints satisfying the predicate
+ /// yields no elements:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
+ /// ```
+ #[inline]
+ fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
+ FieldsWith::new(self.as_bytes(), f)
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated
+ /// by the given byte string. Each element yielded is guaranteed not to
+ /// include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".split_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
+ /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
+ /// [`fields`](#method.fields) instead.
+ #[inline]
+ fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ splitter: &'s B,
+ ) -> Split<'h, 's> {
+ Split::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Returns an iterator over substrings of this byte string, separated by
+ /// the given byte string, in reverse. Each element yielded is guaranteed
+ /// not to include the splitter substring.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> =
+ /// b"Mary had a little lamb".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
+ ///
+ /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
+ /// ```
+ ///
+ /// If a string contains multiple contiguous separators, you will end up
+ /// with empty strings yielded by the iterator:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ ///
+ /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
+ /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
+ /// ```
+ ///
+ /// Separators at the start or end of a string are neighbored by empty
+ /// strings.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
+ /// assert_eq!(x, vec![B(""), B("1"), B("")]);
+ /// ```
+ ///
+ /// When the empty string is used as a separator, it splits every **byte**
+ /// in the byte string, along with the beginning and end of the byte
+ /// string.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
+ /// assert_eq!(x, vec![
+ /// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
+ /// ]);
+ ///
+ /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
+ /// // may not be valid UTF-8!
+ /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
+ /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
+ /// ```
+ ///
+ /// Contiguous separators, especially whitespace, can lead to possibly
+ /// surprising behavior. For example, this code is correct:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
+ /// assert_eq!(x, vec![
+ /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
+ /// ]);
+ /// ```
+ ///
+ /// It does *not* give you `["a", "b", "c"]`.
+ #[inline]
+ fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ splitter: &'s B,
+ ) -> SplitReverse<'h, 's> {
+ SplitReverse::new(self.as_bytes(), splitter.as_ref())
+ }
+
+ /// Split this byte string at the first occurrence of `splitter`.
+ ///
+ /// If the `splitter` is found in the byte string, returns a tuple
+ /// containing the parts of the string before and after the first occurrence
+ /// of `splitter` respectively. Otherwise, if there are no occurrences of
+ /// `splitter` in the byte string, returns `None`.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// If you need to split on the *last* instance of a delimiter instead, see
+ /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(
+ /// B("foo,bar").split_once_str(","),
+ /// Some((B("foo"), B("bar"))),
+ /// );
+ /// assert_eq!(
+ /// B("foo,bar,baz").split_once_str(","),
+ /// Some((B("foo"), B("bar,baz"))),
+ /// );
+ /// assert_eq!(B("foo").split_once_str(","), None);
+ /// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
+ /// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
+ /// ```
+ #[inline]
+ fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &B,
+ ) -> Option<(&'a [u8], &'a [u8])> {
+ let bytes = self.as_bytes();
+ let splitter = splitter.as_ref();
+ let start = Finder::new(splitter).find(bytes)?;
+ let end = start + splitter.len();
+ Some((&bytes[..start], &bytes[end..]))
+ }
+
+ /// Split this byte string at the last occurrence of `splitter`.
+ ///
+ /// If the `splitter` is found in the byte string, returns a tuple
+ /// containing the parts of the string before and after the last occurrence
+ /// of `splitter`, respectively. Otherwise, if there are no occurrences of
+ /// `splitter` in the byte string, returns `None`.
+ ///
+ /// The splitter may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// If you need to split on the *first* instance of a delimiter instead, see
+ /// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert_eq!(
+ /// B("foo,bar").rsplit_once_str(","),
+ /// Some((B("foo"), B("bar"))),
+ /// );
+ /// assert_eq!(
+ /// B("foo,bar,baz").rsplit_once_str(","),
+ /// Some((B("foo,bar"), B("baz"))),
+ /// );
+ /// assert_eq!(B("foo").rsplit_once_str(","), None);
+ /// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
+ /// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
+ /// ```
+ #[inline]
+ fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
+ &'a self,
+ splitter: &B,
+ ) -> Option<(&'a [u8], &'a [u8])> {
+ let bytes = self.as_bytes();
+ let splitter = splitter.as_ref();
+ let start = FinderReverse::new(splitter).rfind(bytes)?;
+ let end = start + splitter.len();
+ Some((&bytes[..start], &bytes[end..]))
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string. If `limit` substrings are yielded,
+ /// then the last substring will contain the remainder of this byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
+ ///
+ /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ limit: usize,
+ splitter: &'s B,
+ ) -> SplitN<'h, 's> {
+ SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Returns an iterator of at most `limit` substrings of this byte string,
+ /// separated by the given byte string, in reverse. If `limit` substrings
+ /// are yielded, then the last substring will contain the remainder of this
+ /// byte string.
+ ///
+ /// The needle may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let x: Vec<_> =
+ /// b"Mary had a little lamb".rsplitn_str(3, " ").collect();
+ /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
+ ///
+ /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![b""]);
+ ///
+ /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
+ ///
+ /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
+ /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
+ /// assert_eq!(x, vec![B("abcXdef")]);
+ ///
+ /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
+ /// assert_eq!(x, vec![B("abcdef")]);
+ ///
+ /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
+ /// assert!(x.is_empty());
+ /// ```
+ #[inline]
+ fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
+ &'h self,
+ limit: usize,
+ splitter: &'s B,
+ ) -> SplitNReverse<'h, 's> {
+ SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
+ }
+
+ /// Replace all matches of the given needle with the given replacement, and
+ /// the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replace_into`](#method.replace_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("old", "new");
+ /// assert_eq!(s, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old".replace("nada nada", "limonada");
+ /// assert_eq!(s, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replace("", "Z");
+ /// assert_eq!(s, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replace_into(needle, replacement, &mut dest);
+ dest
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and the result as a new `Vec<u8>`.
+ ///
+ /// This routine is useful as a convenience. If you need to reuse an
+ /// allocation, use [`replacen_into`](#method.replacen_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("o", "z", 2);
+ /// assert_eq!(s, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo".replacen("a", "z", 2);
+ /// assert_eq!(s, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo".replacen("", "Z", 2);
+ /// assert_eq!(s, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ ) -> Vec<u8> {
+ let mut dest = Vec::with_capacity(self.as_bytes().len());
+ self.replacen_into(needle, replacement, limit, &mut dest);
+ dest
+ }
+
+ /// Replace all matches of the given needle with the given replacement,
+ /// and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replace`](#method.replace) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("old", "new", &mut dest);
+ /// assert_eq!(dest, "this is new".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"this is old";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("nada nada", "limonada", &mut dest);
+ /// assert_eq!(dest, "this is old".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replace_into("", "Z", &mut dest);
+ /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Replace up to `limit` matches of the given needle with the given
+ /// replacement, and write the result into the provided `Vec<u8>`.
+ ///
+ /// This does **not** clear `dest` before writing to it.
+ ///
+ /// This routine is useful for reusing allocation. For a more convenient
+ /// API, use [`replacen`](#method.replacen) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("o", "z", 2, &mut dest);
+ /// assert_eq!(dest, "fzzfoo".as_bytes());
+ /// ```
+ ///
+ /// When the pattern doesn't match:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foofoo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("a", "z", 2, &mut dest);
+ /// assert_eq!(dest, "foofoo".as_bytes());
+ /// ```
+ ///
+ /// When the needle is an empty string:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foo";
+ ///
+ /// let mut dest = vec![];
+ /// s.replacen_into("", "Z", 2, &mut dest);
+ /// assert_eq!(dest, "ZfZoo".as_bytes());
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
+ &self,
+ needle: N,
+ replacement: R,
+ limit: usize,
+ dest: &mut Vec<u8>,
+ ) {
+ let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
+
+ let mut last = 0;
+ for start in self.find_iter(needle).take(limit) {
+ dest.push_str(&self.as_bytes()[last..start]);
+ dest.push_str(replacement);
+ last = start + needle.len();
+ }
+ dest.push_str(&self.as_bytes()[last..]);
+ }
+
+ /// Returns an iterator over the bytes in this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"foobar";
+ /// let bytes: Vec<u8> = bs.bytes().collect();
+ /// assert_eq!(bytes, bs);
+ /// ```
+ #[inline]
+ fn bytes(&self) -> Bytes<'_> {
+ Bytes { it: self.as_bytes().iter() }
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().collect();
+ /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<char> = bs.chars().rev().collect();
+ /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+ /// ```
+ #[inline]
+ fn chars(&self) -> Chars<'_> {
+ Chars::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the Unicode scalar values in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// Note that this is slightly different from the `CharIndices` iterator
+ /// provided by the standard library. Aside from working on possibly
+ /// invalid UTF-8, this iterator provides both the corresponding starting
+ /// and ending byte indices of each codepoint yielded. The ending position
+ /// is necessary to slice the original byte string when invalid UTF-8 bytes
+ /// are converted into a Unicode replacement codepoint, since a single
+ /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
+ /// (inclusive).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+ /// assert_eq!(chars, vec![
+ /// (0, 3, '☃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (10, 11, 'a'),
+ /// ]);
+ /// ```
+ ///
+ /// Codepoints can also be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
+ /// let chars: Vec<(usize, usize, char)> = bs
+ /// .char_indices()
+ /// .rev()
+ /// .collect();
+ /// assert_eq!(chars, vec![
+ /// (10, 11, 'a'),
+ /// (8, 10, '\u{FFFD}'),
+ /// (4, 8, '𝞃'),
+ /// (3, 4, '\u{FFFD}'),
+ /// (0, 3, '☃'),
+ /// ]);
+ /// ```
+ #[inline]
+ fn char_indices(&self) -> CharIndices<'_> {
+ CharIndices::new(self.as_bytes())
+ }
+
+ /// Iterate over chunks of valid UTF-8.
+ ///
+ /// The iterator returned yields chunks of valid UTF-8 separated by invalid
+ /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
+ /// which are determined via the "substitution of maximal subparts"
+ /// strategy described in the docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to gather all valid and invalid chunks from a
+ /// byte slice:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, Utf8Chunk};
+ ///
+ /// let bytes = b"foo\xFD\xFEbar\xFF";
+ ///
+ /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
+ /// for chunk in bytes.utf8_chunks() {
+ /// if !chunk.valid().is_empty() {
+ /// valid_chunks.push(chunk.valid());
+ /// }
+ /// if !chunk.invalid().is_empty() {
+ /// invalid_chunks.push(chunk.invalid());
+ /// }
+ /// }
+ ///
+ /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
+ /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
+ /// ```
+ #[inline]
+ fn utf8_chunks(&self) -> Utf8Chunks<'_> {
+ Utf8Chunks { bytes: self.as_bytes() }
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string.
+ /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
+ /// is yielded instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how multiple codepoints can combine to form a
+ /// single grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().collect();
+ /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
+ /// ```
+ ///
+ /// This shows that graphemes can be iterated over in reverse:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
+ /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn graphemes(&self) -> Graphemes<'_> {
+ Graphemes::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the grapheme clusters in this byte string
+ /// along with their starting and ending byte index positions. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// grapheme cluster:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bs.grapheme_indices().collect();
+ /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
+ /// ```
+ ///
+ /// This example shows what happens when invalid UTF-8 is encountered. Note
+ /// that the offsets are valid indices into the original string, and do
+ /// not necessarily correspond to the length of the `&str` returned!
+ ///
+ /// ```
+ /// # #[cfg(all(feature = "alloc"))] {
+ /// use bstr::{ByteSlice, ByteVec};
+ ///
+ /// let mut bytes = vec![];
+ /// bytes.push_str("a\u{0300}\u{0316}");
+ /// bytes.push(b'\xFF');
+ /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
+ ///
+ /// let graphemes: Vec<(usize, usize, &str)> =
+ /// bytes.grapheme_indices().collect();
+ /// assert_eq!(
+ /// graphemes,
+ /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
+ /// );
+ /// # }
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn grapheme_indices(&self) -> GraphemeIndices<'_> {
+ GraphemeIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string. If invalid
+ /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
+ /// instead.
+ ///
+ /// This is similar to
+ /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words().collect();
+ /// assert_eq!(words, vec![
+ /// "The", "quick", "brown", "fox", "can't",
+ /// "jump", "32.3", "feet", "right",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words(&self) -> Words<'_> {
+ Words::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// This is similar to
+ /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
+ /// except it only returns elements that contain a "word" character. A word
+ /// character is defined by UTS #18 (Annex C) to be the combination of the
+ /// `Alphabetic` and `Join_Control` properties, along with the
+ /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
+ /// categories.
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (6, 10, "jump"),
+ /// (11, 15, "32.3"),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn word_indices(&self) -> WordIndices<'_> {
+ WordIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words in this byte string, along with
+ /// all breaks between the words. Concatenating all elements yielded by
+ /// the iterator results in the original string (modulo Unicode replacement
+ /// codepoint substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
+ /// let words: Vec<&str> = bs.words_with_breaks().collect();
+ /// assert_eq!(words, vec![
+ /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
+ /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
+ /// ",", " ", "right", "?",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
+ WordsWithBreaks::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the words and their byte offsets in this
+ /// byte string, along with all breaks between the words. Concatenating
+ /// all elements yielded by the iterator results in the original string
+ /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
+ /// encountered).
+ ///
+ /// Since words are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// This example shows how to get the byte offsets of each individual
+ /// word:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"can't jump 32.3 feet";
+ /// let words: Vec<(usize, usize, &str)> =
+ /// bs.words_with_break_indices().collect();
+ /// assert_eq!(words, vec![
+ /// (0, 5, "can't"),
+ /// (5, 6, " "),
+ /// (6, 10, "jump"),
+ /// (10, 11, " "),
+ /// (11, 15, "32.3"),
+ /// (15, 16, " "),
+ /// (16, 20, "feet"),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
+ WordsWithBreakIndices::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<&str> = bs.sentences().collect();
+ /// assert_eq!(sentences, vec![
+ /// "I want this. ",
+ /// "Not that. ",
+ /// "Right now.",
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentences(&self) -> Sentences<'_> {
+ Sentences::new(self.as_bytes())
+ }
+
+ /// Returns an iterator over the sentences in this byte string along with
+ /// their starting and ending byte index positions.
+ ///
+ /// Typically, a sentence will include its trailing punctuation and
+ /// whitespace. Concatenating all elements yielded by the iterator
+ /// results in the original string (modulo Unicode replacement codepoint
+ /// substitutions if invalid UTF-8 is encountered).
+ ///
+ /// Since sentences are made up of one or more codepoints, this iterator
+ /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
+ /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let bs = b"I want this. Not that. Right now.";
+ /// let sentences: Vec<(usize, usize, &str)> =
+ /// bs.sentence_indices().collect();
+ /// assert_eq!(sentences, vec![
+ /// (0, 13, "I want this. "),
+ /// (13, 23, "Not that. "),
+ /// (23, 33, "Right now."),
+ /// ]);
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn sentence_indices(&self) -> SentenceIndices<'_> {
+ SentenceIndices::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, without their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminators recognized are `\r\n` and
+ /// `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines(&self) -> Lines<'_> {
+ Lines::new(self.as_bytes())
+ }
+
+ /// An iterator over all lines in a byte string, including their
+ /// terminators.
+ ///
+ /// For this iterator, the only line terminator recognized is `\n`. (Since
+ /// line terminators are included, this also handles `\r\n` line endings.)
+ ///
+ /// Line terminators are only included if they are present in the original
+ /// byte string. For example, the last line in a byte string may not end
+ /// with a line terminator.
+ ///
+ /// Concatenating all elements yielded by this iterator is guaranteed to
+ /// yield the original byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ ///
+ /// bar\r
+ /// baz
+ ///
+ ///
+ /// quux";
+ /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
+ /// assert_eq!(lines, vec![
+ /// B("foo\n"),
+ /// B("\n"),
+ /// B("bar\r\n"),
+ /// B("baz\n"),
+ /// B("\n"),
+ /// B("\n"),
+ /// B("quux"),
+ /// ]);
+ /// ```
+ #[inline]
+ fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
+ LinesWithTerminator::new(self.as_bytes())
+ }
+
+ /// Return a byte string slice with leading and trailing whitespace
+ /// removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim(), B("foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim(&self) -> &[u8] {
+ self.trim_start().trim_end()
+ }
+
+ /// Return a byte string slice with leading whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_start(&self) -> &[u8] {
+ let start = whitespace_len_fwd(self.as_bytes());
+ &self.as_bytes()[start..]
+ }
+
+ /// Return a byte string slice with trailing whitespace removed.
+ ///
+ /// Whitespace is defined according to the terms of the `White_Space`
+ /// Unicode property.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(" foo\tbar\t\u{2003}\n");
+ /// assert_eq!(s.trim_end(), B(" foo\tbar"));
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn trim_end(&self) -> &[u8] {
+ let end = whitespace_len_rev(self.as_bytes());
+ &self.as_bytes()[..end]
+ }
+
+ /// Return a byte string slice with leading and trailing characters
+ /// satisfying the given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ self.trim_start_with(&mut trim).trim_end_with(&mut trim)
+ }
+
+ /// Return a byte string slice with leading characters satisfying the given
+ /// predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
+ /// ```
+ #[inline]
+ fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (s, _, ch) in self.char_indices() {
+ if !trim(ch) {
+ return &self.as_bytes()[s..];
+ }
+ }
+ b""
+ }
+
+ /// Return a byte string slice with trailing characters satisfying the
+ /// given predicate removed.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"123foo5bar789";
+ /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
+ /// ```
+ #[inline]
+ fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
+ for (_, e, ch) in self.char_indices().rev() {
+ if !trim(ch) {
+ return &self.as_bytes()[..e];
+ }
+ }
+ b""
+ }
+
+ /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase` Unicode
+ /// property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
+ /// ```
+ #[cfg(all(feature = "alloc", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_lowercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the lowercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, lowercase is defined according to the `Lowercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_lowercase_into(&mut buf);
+ /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
+ /// ```
+ #[cfg(all(feature = "alloc", feature = "unicode"))]
+ #[inline]
+ fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+
+ // TODO(BUG): This doesn't handle any special casing rules.
+
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_lowercase());
+ } else {
+ for upper in ch.to_lowercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("HELLO Β");
+ /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn to_ascii_lowercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_lowercase()
+ }
+
+ /// Convert this byte string to its lowercase ASCII equivalent in place.
+ ///
+ /// In this case, lowercase is only defined in ASCII letters. Namely, the
+ /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("HELLO Β");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, "hello Β".as_bytes());
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// # #[cfg(feature = "alloc")] {
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
+ /// s.make_ascii_lowercase();
+ /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
+ /// # }
+ /// ```
+ #[inline]
+ fn make_ascii_lowercase(&mut self) {
+ self.as_bytes_mut().make_ascii_lowercase();
+ }
+
+ /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
+ /// byte string.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ /// assert_eq!(s.to_uppercase(), B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "alloc", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase(&self) -> Vec<u8> {
+ let mut buf = vec![];
+ self.to_uppercase_into(&mut buf);
+ buf
+ }
+
+ /// Writes the uppercase equivalent of this byte string into the given
+ /// buffer. The buffer is not cleared before written to.
+ ///
+ /// In this case, uppercase is defined according to the `Uppercase`
+ /// Unicode property.
+ ///
+ /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
+ /// then it is written to the given buffer unchanged.
+ ///
+ /// Note that some characters in this byte string may expand into multiple
+ /// characters when changing the case, so the number of bytes written to
+ /// the given byte string may not be equivalent to the number of bytes in
+ /// this byte string.
+ ///
+ /// If you don't need to amortize allocation and instead prefer
+ /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("HELLO Β"));
+ /// ```
+ ///
+ /// Scripts without case are not changed:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("农历新年");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B("农历新年"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ ///
+ /// let mut buf = vec![];
+ /// s.to_uppercase_into(&mut buf);
+ /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(all(feature = "alloc", feature = "unicode"))]
+ #[inline]
+ fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
+ // TODO: This is the best we can do given what std exposes I think.
+ // If we roll our own case handling, then we might be able to do this
+ // a bit faster. We shouldn't roll our own case handling unless we
+ // need to, e.g., for doing caseless matching or case folding.
+ buf.reserve(self.as_bytes().len());
+ for (s, e, ch) in self.char_indices() {
+ if ch == '\u{FFFD}' {
+ buf.push_str(&self.as_bytes()[s..e]);
+ } else if ch.is_ascii() {
+ buf.push_char(ch.to_ascii_uppercase());
+ } else {
+ for upper in ch.to_uppercase() {
+ buf.push_char(upper);
+ }
+ }
+ }
+ }
+
+ /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
+ /// this byte string.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ /// In particular, the length of the byte string returned is always
+ /// equivalent to the length of this byte string.
+ ///
+ /// If you'd like to reuse an allocation for performance reasons, then use
+ /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
+ /// the conversion in place.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B("hello β");
+ /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = B(b"foo\xFFbar\xE2\x98baz");
+ /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// ```
+ #[cfg(feature = "alloc")]
+ #[inline]
+ fn to_ascii_uppercase(&self) -> Vec<u8> {
+ self.as_bytes().to_ascii_uppercase()
+ }
+
+ /// Convert this byte string to its uppercase ASCII equivalent in place.
+ ///
+ /// In this case, uppercase is only defined in ASCII letters. Namely, the
+ /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
+ ///
+ /// If you don't need to do the conversion in
+ /// place and instead prefer convenience, then use
+ /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("hello β");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B("HELLO β"));
+ /// ```
+ ///
+ /// Invalid UTF-8 remains as is:
+ ///
+ /// ```
+ /// # #[cfg(feature = "alloc")] {
+ /// use bstr::{B, ByteSlice, ByteVec};
+ ///
+ /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
+ /// s.make_ascii_uppercase();
+ /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
+ /// # }
+ /// ```
+ #[inline]
+ fn make_ascii_uppercase(&mut self) {
+ self.as_bytes_mut().make_ascii_uppercase();
+ }
+
+ /// Reverse the bytes in this string, in place.
+ ///
+ /// This is not necessarily a well formed operation! For example, if this
+ /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
+ /// string will likely result in invalid UTF-8 and otherwise non-sensical
+ /// content.
+ ///
+ /// Note that this is equivalent to the generic `[u8]::reverse` method.
+ /// This method is provided to permit callers to explicitly differentiate
+ /// between reversing bytes, codepoints and graphemes.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("hello");
+ /// s.reverse_bytes();
+ /// assert_eq!(s, "olleh".as_bytes());
+ /// ```
+ #[inline]
+ fn reverse_bytes(&mut self) {
+ self.as_bytes_mut().reverse();
+ }
+
+ /// Reverse the codepoints in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by codepoint
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut chars: Vec<char> = s.chars().collect();
+ /// chars.reverse();
+ ///
+ /// let reversed: String = chars.into_iter().collect();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// Note that this is not necessarily a well formed operation. For example,
+ /// if this byte string contains grapheme clusters with more than one
+ /// codepoint, then those grapheme clusters will not necessarily be
+ /// preserved. If you'd like to preserve grapheme clusters, then use
+ /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_chars();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows that not all reversals lead to a well formed string.
+ /// For example, in this case, combining marks are used to put accents over
+ /// some letters, and those accent marks must appear after the codepoints
+ /// they modify.
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
+ /// ```
+ ///
+ /// A word of warning: the above example relies on the fact that
+ /// `résumé` is in decomposed normal form, which means there are separate
+ /// codepoints for the accents above `e`. If it is instead in composed
+ /// normal form, then the example works:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_chars();
+ /// assert_eq!(s, B("émusér"));
+ /// ```
+ ///
+ /// The point here is to be cautious and not assume that just because
+ /// `reverse_chars` works in one case, that it therefore works in all
+ /// cases.
+ #[inline]
+ fn reverse_chars(&mut self) {
+ let mut i = 0;
+ loop {
+ let (_, size) = utf8::decode(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Reverse the graphemes in this string, in place.
+ ///
+ /// If this byte string is valid UTF-8, then its reversal by grapheme
+ /// is also guaranteed to be valid UTF-8.
+ ///
+ /// This operation is equivalent to the following, but without allocating:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ ///
+ /// let mut graphemes: Vec<&str> = s.graphemes().collect();
+ /// graphemes.reverse();
+ ///
+ /// let reversed = graphemes.concat();
+ /// assert_eq!(reversed, "rab☃oof");
+ /// ```
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("foo☃bar");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "rab☃oof".as_bytes());
+ /// ```
+ ///
+ /// This example shows how this correctly handles grapheme clusters,
+ /// unlike `reverse_chars`.
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut s = <Vec<u8>>::from("résumé");
+ /// s.reverse_graphemes();
+ /// assert_eq!(s, "émusér".as_bytes());
+ /// ```
+ #[cfg(feature = "unicode")]
+ #[inline]
+ fn reverse_graphemes(&mut self) {
+ use crate::unicode::decode_grapheme;
+
+ let mut i = 0;
+ loop {
+ let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
+ if size == 0 {
+ break;
+ }
+ if size > 1 {
+ self.as_bytes_mut()[i..i + size].reverse_bytes();
+ }
+ i += size;
+ }
+ self.reverse_bytes();
+ }
+
+ /// Returns true if and only if every byte in this byte string is ASCII.
+ ///
+ /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
+ /// an ASCII codepoint if and only if it is in the inclusive range
+ /// `[0, 127]`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_ascii());
+ /// assert!(!B("☃βツ").is_ascii());
+ /// assert!(!B(b"\xFF").is_ascii());
+ /// ```
+ #[inline]
+ fn is_ascii(&self) -> bool {
+ ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
+ }
+
+ /// Returns true if and only if the entire byte string is valid UTF-8.
+ ///
+ /// If you need location information about where a byte string's first
+ /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// assert!(B("abc").is_utf8());
+ /// assert!(B("☃βツ").is_utf8());
+ /// // invalid bytes
+ /// assert!(!B(b"abc\xFF").is_utf8());
+ /// // surrogate encoding
+ /// assert!(!B(b"\xED\xA0\x80").is_utf8());
+ /// // incomplete sequence
+ /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
+ /// // overlong sequence
+ /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
+ /// ```
+ #[inline]
+ fn is_utf8(&self) -> bool {
+ utf8::validate(self.as_bytes()).is_ok()
+ }
+
+ /// Returns the last byte in this byte string, if it's non-empty. If this
+ /// byte string is empty, this returns `None`.
+ ///
+ /// Note that this is like the generic `[u8]::last`, except this returns
+ /// the byte by value instead of a reference to the byte.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// assert_eq!(Some(b'z'), b"baz".last_byte());
+ /// assert_eq!(None, b"".last_byte());
+ /// ```
+ #[inline]
+ fn last_byte(&self) -> Option<u8> {
+ let bytes = self.as_bytes();
+ bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
+ }
+
+ /// Returns the index of the first non-ASCII byte in this byte string (if
+ /// any such indices exist). Specifically, it returns the index of the
+ /// first byte with a value greater than or equal to `0x80`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{ByteSlice, B};
+ ///
+ /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
+ /// assert_eq!(None, b"abcde".find_non_ascii_byte());
+ /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
+ /// ```
+ #[inline]
+ fn find_non_ascii_byte(&self) -> Option<usize> {
+ let index = ascii::first_non_ascii_byte(self.as_bytes());
+ if index == self.as_bytes().len() {
+ None
+ } else {
+ Some(index)
+ }
+ }
+}
+
+/// A single substring searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
+/// or
+/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
+/// is good enough, but `Finder` is useful when you can meaningfully observe
+/// searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `Finder` that is not connected to the
+/// lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct Finder<'a>(memmem::Finder<'a>);
+
+impl<'a> Finder<'a> {
+ /// Create a new finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
+ Finder(memmem::Finder::new(needle.as_ref()))
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> Finder<'static> {
+ Finder(self.0.into_owned())
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
+ /// finder's needle can be either borrowed or owned, so the lifetime of the
+ /// needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.0.needle()
+ }
+
+ /// Returns the index of the first occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::Finder;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
+ /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
+ /// assert_eq!(None, Finder::new("quux").find(haystack));
+ /// ```
+ #[inline]
+ pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.0.find(haystack.as_ref())
+ }
+}
+
+/// A single substring reverse searcher fixed to a particular needle.
+///
+/// The purpose of this type is to permit callers to construct a substring
+/// searcher that can be used to search haystacks without the overhead of
+/// constructing the searcher in the first place. This is a somewhat niche
+/// concern when it's necessary to re-use the same needle to search multiple
+/// different haystacks with as little overhead as possible. In general, using
+/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
+/// or
+/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
+/// is good enough, but `FinderReverse` is useful when you can meaningfully
+/// observe searcher construction time in a profile.
+///
+/// When the `std` feature is enabled, then this type has an `into_owned`
+/// version which permits building a `FinderReverse` that is not connected to
+/// the lifetime of its needle.
+#[derive(Clone, Debug)]
+pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
+
+impl<'a> FinderReverse<'a> {
+ /// Create a new reverse finder for the given needle.
+ #[inline]
+ pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
+ FinderReverse(memmem::FinderRev::new(needle.as_ref()))
+ }
+
+ /// Convert this finder into its owned variant, such that it no longer
+ /// borrows the needle.
+ ///
+ /// If this is already an owned finder, then this is a no-op. Otherwise,
+ /// this copies the needle.
+ ///
+ /// This is only available when the `std` feature is enabled.
+ #[cfg(feature = "std")]
+ #[inline]
+ pub fn into_owned(self) -> FinderReverse<'static> {
+ FinderReverse(self.0.into_owned())
+ }
+
+ /// Returns the needle that this finder searches for.
+ ///
+ /// Note that the lifetime of the needle returned is tied to the lifetime
+ /// of this finder, and may be shorter than the `'a` lifetime. Namely,
+ /// a finder's needle can be either borrowed or owned, so the lifetime of
+ /// the needle returned must necessarily be the shorter of the two.
+ #[inline]
+ pub fn needle(&self) -> &[u8] {
+ self.0.needle()
+ }
+
+ /// Returns the index of the last occurrence of this needle in the given
+ /// haystack.
+ ///
+ /// The haystack may be any type that can be cheaply converted into a
+ /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
+ ///
+ /// # Complexity
+ ///
+ /// This routine is guaranteed to have worst case linear time complexity
+ /// with respect to both the needle and the haystack. That is, this runs
+ /// in `O(needle.len() + haystack.len())` time.
+ ///
+ /// This routine is also guaranteed to have worst case constant space
+ /// complexity.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::FinderReverse;
+ ///
+ /// let haystack = "foo bar baz";
+ /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
+ /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
+ /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
+ /// ```
+ #[inline]
+ pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
+ self.0.rfind(haystack.as_ref())
+ }
+}
+
+/// An iterator over non-overlapping substring matches.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
+/// needle.
+#[derive(Debug)]
+pub struct Find<'h, 'n> {
+ it: memmem::FindIter<'h, 'n>,
+ haystack: &'h [u8],
+ needle: &'n [u8],
+}
+
+impl<'h, 'n> Find<'h, 'n> {
+ fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
+ Find { it: memmem::find_iter(haystack, needle), haystack, needle }
+ }
+}
+
+impl<'h, 'n> Iterator for Find<'h, 'n> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ self.it.next()
+ }
+}
+
+/// An iterator over non-overlapping substring matches in reverse.
+///
+/// Matches are reported by the byte offset at which they begin.
+///
+/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
+/// needle.
+#[derive(Debug)]
+pub struct FindReverse<'h, 'n> {
+ it: memmem::FindRevIter<'h, 'n>,
+ haystack: &'h [u8],
+ needle: &'n [u8],
+}
+
+impl<'h, 'n> FindReverse<'h, 'n> {
+ fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
+ FindReverse {
+ it: memmem::rfind_iter(haystack, needle),
+ haystack,
+ needle,
+ }
+ }
+
+ fn haystack(&self) -> &'h [u8] {
+ self.haystack
+ }
+
+ fn needle(&self) -> &'n [u8] {
+ self.needle
+ }
+}
+
+impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<usize> {
+ self.it.next()
+ }
+}
+
+/// An iterator over the bytes in a byte string.
+///
+/// `'a` is the lifetime of the byte string being traversed.
+#[derive(Clone, Debug)]
+pub struct Bytes<'a> {
+ it: slice::Iter<'a, u8>,
+}
+
+impl<'a> Bytes<'a> {
+ /// Views the remaining underlying data as a subslice of the original data.
+ /// This has the same lifetime as the original slice,
+ /// and so the iterator can continue to be used while this exists.
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.it.as_slice()
+ }
+}
+
+impl<'a> Iterator for Bytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next().map(|&b| b)
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.it.size_hint()
+ }
+}
+
+impl<'a> DoubleEndedIterator for Bytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back().map(|&b| b)
+ }
+}
+
+impl<'a> ExactSizeIterator for Bytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+impl<'a> iter::FusedIterator for Bytes<'a> {}
+
+/// An iterator over the fields in a byte string, separated by whitespace.
+///
+/// Whitespace for this iterator is defined by the Unicode property
+/// `White_Space`.
+///
+/// This iterator splits on contiguous runs of whitespace, such that the fields
+/// in `foo\t\t\n \nbar` are `foo` and `bar`.
+///
+/// `'a` is the lifetime of the byte string being split.
+#[cfg(feature = "unicode")]
+#[derive(Debug)]
+pub struct Fields<'a> {
+ it: FieldsWith<'a, fn(char) -> bool>,
+}
+
+#[cfg(feature = "unicode")]
+impl<'a> Fields<'a> {
+ fn new(bytes: &'a [u8]) -> Fields<'a> {
+ Fields { it: bytes.fields_with(|ch| ch.is_whitespace()) }
+ }
+}
+
+#[cfg(feature = "unicode")]
+impl<'a> Iterator for Fields<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ self.it.next()
+ }
+}
+
+/// An iterator over fields in the byte string, separated by a predicate over
+/// codepoints.
+///
+/// This iterator splits a byte string based on its predicate function such
+/// that the elements returned are separated by contiguous runs of codepoints
+/// for which the predicate returns true.
+///
+/// `'a` is the lifetime of the byte string being split, while `F` is the type
+/// of the predicate, i.e., `FnMut(char) -> bool`.
+#[derive(Debug)]
+pub struct FieldsWith<'a, F> {
+ f: F,
+ bytes: &'a [u8],
+ chars: CharIndices<'a>,
+}
+
+impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
+ fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
+ FieldsWith { f, bytes, chars: bytes.char_indices() }
+ }
+}
+
+impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ let (start, mut end);
+ loop {
+ match self.chars.next() {
+ None => return None,
+ Some((s, e, ch)) => {
+ if !(self.f)(ch) {
+ start = s;
+ end = e;
+ break;
+ }
+ }
+ }
+ }
+ while let Some((_, e, ch)) = self.chars.next() {
+ if (self.f)(ch) {
+ break;
+ }
+ end = e;
+ }
+ Some(&self.bytes[start..end])
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator.
+///
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
+#[derive(Debug)]
+pub struct Split<'h, 's> {
+ finder: Find<'h, 's>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'h, 's> Split<'h, 's> {
+ fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
+ let finder = haystack.find_iter(splitter);
+ Split { finder, last: 0, done: false }
+ }
+}
+
+impl<'h, 's> Iterator for Split<'h, 's> {
+ type Item = &'h [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'h [u8]> {
+ let haystack = self.finder.haystack;
+ match self.finder.next() {
+ Some(start) => {
+ let next = &haystack[self.last..start];
+ self.last = start + self.finder.needle.len();
+ Some(next)
+ }
+ None => {
+ if self.last >= haystack.len() {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[self.last..];
+ self.last = haystack.len();
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over substrings in a byte string, split by a separator, in
+/// reverse.
+///
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
+#[derive(Debug)]
+pub struct SplitReverse<'h, 's> {
+ finder: FindReverse<'h, 's>,
+ /// The end position of the previous match of our splitter. The element
+ /// we yield corresponds to the substring starting at `last` up to the
+ /// beginning of the next match of the splitter.
+ last: usize,
+ /// Only set when iteration is complete. A corner case here is when a
+ /// splitter is matched at the end of the haystack. At that point, we still
+ /// need to yield an empty string following it.
+ done: bool,
+}
+
+impl<'h, 's> SplitReverse<'h, 's> {
+ fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
+ let finder = haystack.rfind_iter(splitter);
+ SplitReverse { finder, last: haystack.len(), done: false }
+ }
+}
+
+impl<'h, 's> Iterator for SplitReverse<'h, 's> {
+ type Item = &'h [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'h [u8]> {
+ let haystack = self.finder.haystack();
+ match self.finder.next() {
+ Some(start) => {
+ let nlen = self.finder.needle().len();
+ let next = &haystack[start + nlen..self.last];
+ self.last = start;
+ Some(next)
+ }
+ None => {
+ if self.last == 0 {
+ if !self.done {
+ self.done = true;
+ Some(b"")
+ } else {
+ None
+ }
+ } else {
+ let s = &haystack[..self.last];
+ self.last = 0;
+ self.done = true;
+ Some(s)
+ }
+ }
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator.
+///
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
+#[derive(Debug)]
+pub struct SplitN<'h, 's> {
+ split: Split<'h, 's>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'h, 's> SplitN<'h, 's> {
+ fn new(
+ haystack: &'h [u8],
+ splitter: &'s [u8],
+ limit: usize,
+ ) -> SplitN<'h, 's> {
+ let split = haystack.split_str(splitter);
+ SplitN { split, limit, count: 0 }
+ }
+}
+
+impl<'h, 's> Iterator for SplitN<'h, 's> {
+ type Item = &'h [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'h [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack[self.split.last..])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over at most `n` substrings in a byte string, split by a
+/// separator, in reverse.
+///
+/// `'h` is the lifetime of the byte string being split (the haystack), while
+/// `'s` is the lifetime of the byte string doing the splitting.
+#[derive(Debug)]
+pub struct SplitNReverse<'h, 's> {
+ split: SplitReverse<'h, 's>,
+ limit: usize,
+ count: usize,
+}
+
+impl<'h, 's> SplitNReverse<'h, 's> {
+ fn new(
+ haystack: &'h [u8],
+ splitter: &'s [u8],
+ limit: usize,
+ ) -> SplitNReverse<'h, 's> {
+ let split = haystack.rsplit_str(splitter);
+ SplitNReverse { split, limit, count: 0 }
+ }
+}
+
+impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
+ type Item = &'h [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'h [u8]> {
+ self.count += 1;
+ if self.count > self.limit || self.split.done {
+ None
+ } else if self.count == self.limit {
+ Some(&self.split.finder.haystack()[..self.split.last])
+ } else {
+ self.split.next()
+ }
+ }
+}
+
+/// An iterator over all lines in a byte string, without their terminators.
+///
+/// For this iterator, the only line terminators recognized are `\r\n` and
+/// `\n`.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+#[derive(Clone, Debug)]
+pub struct Lines<'a> {
+ it: LinesWithTerminator<'a>,
+}
+
+impl<'a> Lines<'a> {
+ fn new(bytes: &'a [u8]) -> Lines<'a> {
+ Lines { it: LinesWithTerminator::new(bytes) }
+ }
+
+ /// Return a copy of the rest of the underlying bytes without affecting the
+ /// iterator itself.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ /// bar\r
+ /// baz";
+ /// let mut lines = s.lines();
+ /// assert_eq!(lines.next(), Some(B("foo")));
+ /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
+ /// ```
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.it.bytes
+ }
+}
+
+impl<'a> Iterator for Lines<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ Some(trim_last_terminator(self.it.next()?))
+ }
+}
+
+impl<'a> DoubleEndedIterator for Lines<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ Some(trim_last_terminator(self.it.next_back()?))
+ }
+}
+
+impl<'a> iter::FusedIterator for Lines<'a> {}
+
+/// An iterator over all lines in a byte string, including their terminators.
+///
+/// For this iterator, the only line terminator recognized is `\n`. (Since
+/// line terminators are included, this also handles `\r\n` line endings.)
+///
+/// Line terminators are only included if they are present in the original
+/// byte string. For example, the last line in a byte string may not end with
+/// a line terminator.
+///
+/// Concatenating all elements yielded by this iterator is guaranteed to yield
+/// the original byte string.
+///
+/// `'a` is the lifetime of the byte string being iterated over.
+#[derive(Clone, Debug)]
+pub struct LinesWithTerminator<'a> {
+ bytes: &'a [u8],
+}
+
+impl<'a> LinesWithTerminator<'a> {
+ fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
+ LinesWithTerminator { bytes }
+ }
+
+ /// Return a copy of the rest of the underlying bytes without affecting the
+ /// iterator itself.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteSlice};
+ ///
+ /// let s = b"\
+ /// foo
+ /// bar\r
+ /// baz";
+ /// let mut lines = s.lines_with_terminator();
+ /// assert_eq!(lines.next(), Some(B("foo\n")));
+ /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
+ /// ```
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bytes
+ }
+}
+
+impl<'a> Iterator for LinesWithTerminator<'a> {
+ type Item = &'a [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a [u8]> {
+ match self.bytes.find_byte(b'\n') {
+ None if self.bytes.is_empty() => None,
+ None => {
+ let line = self.bytes;
+ self.bytes = b"";
+ Some(line)
+ }
+ Some(end) => {
+ let line = &self.bytes[..end + 1];
+ self.bytes = &self.bytes[end + 1..];
+ Some(line)
+ }
+ }
+ }
+}
+
+impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ let end = self.bytes.len().checked_sub(1)?;
+ match self.bytes[..end].rfind_byte(b'\n') {
+ None => {
+ let line = self.bytes;
+ self.bytes = b"";
+ Some(line)
+ }
+ Some(end) => {
+ let line = &self.bytes[end + 1..];
+ self.bytes = &self.bytes[..end + 1];
+ Some(line)
+ }
+ }
+ }
+}
+
+impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
+
+fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
+ if s.last_byte() == Some(b'\n') {
+ s = &s[..s.len() - 1];
+ if s.last_byte() == Some(b'\r') {
+ s = &s[..s.len() - 1];
+ }
+ }
+ s
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ use crate::{
+ ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
+ tests::LOSSY_TESTS,
+ };
+
+ #[test]
+ fn to_str_lossy() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got = B(input).to_str_lossy();
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy(ith: {:?}, given: {:?})",
+ i,
+ input,
+ );
+
+ let mut got = String::new();
+ B(input).to_str_lossy_into(&mut got);
+ assert_eq!(
+ expected.as_bytes(),
+ got.as_bytes(),
+ "to_str_lossy_into",
+ );
+
+ let got = String::from_utf8_lossy(input);
+ assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
+ }
+ }
+
+ #[test]
+ fn lines_iteration() {
+ macro_rules! t {
+ ($it:expr, $forward:expr) => {
+ let mut res: Vec<&[u8]> = Vec::from($forward);
+ assert_eq!($it.collect::<Vec<_>>(), res);
+ res.reverse();
+ assert_eq!($it.rev().collect::<Vec<_>>(), res);
+ };
+ }
+
+ t!(Lines::new(b""), []);
+ t!(LinesWithTerminator::new(b""), []);
+
+ t!(Lines::new(b"\n"), [B("")]);
+ t!(Lines::new(b"\r\n"), [B("")]);
+ t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
+
+ t!(Lines::new(b"a"), [B("a")]);
+ t!(LinesWithTerminator::new(b"a"), [B("a")]);
+
+ t!(Lines::new(b"abc"), [B("abc")]);
+ t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
+
+ t!(Lines::new(b"abc\n"), [B("abc")]);
+ t!(Lines::new(b"abc\r\n"), [B("abc")]);
+ t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
+
+ t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
+ t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
+
+ t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
+ t!(
+ LinesWithTerminator::new(b"abc\n\ndef"),
+ [B("abc\n"), B("\n"), B("def")]
+ );
+
+ t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
+ t!(
+ LinesWithTerminator::new(b"abc\n\ndef\n"),
+ [B("abc\n"), B("\n"), B("def\n")]
+ );
+
+ t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
+ t!(
+ LinesWithTerminator::new(b"\na\nb\n"),
+ [B("\n"), B("a\n"), B("b\n")]
+ );
+
+ t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
+ t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
+ }
+}
diff --git a/vendor/bstr/src/ext_vec.rs b/vendor/bstr/src/ext_vec.rs
new file mode 100644
index 000000000..b8e2be2cf
--- /dev/null
+++ b/vendor/bstr/src/ext_vec.rs
@@ -0,0 +1,1124 @@
+use core::fmt;
+use core::iter;
+use core::ops;
+use core::ptr;
+
+use alloc::{borrow::Cow, string::String, vec, vec::Vec};
+
+#[cfg(feature = "std")]
+use std::{
+ error,
+ ffi::{OsStr, OsString},
+ path::{Path, PathBuf},
+};
+
+use crate::{
+ ext_slice::ByteSlice,
+ utf8::{self, Utf8Error},
+};
+
+/// Concatenate the elements given by the iterator together into a single
+/// `Vec<u8>`.
+///
+/// The elements may be any type that can be cheaply converted into an `&[u8]`.
+/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::concat(&["foo", "bar", "baz"]);
+/// assert_eq!(s, "foobarbaz".as_bytes());
+/// ```
+#[inline]
+pub fn concat<T, I>(elements: I) -> Vec<u8>
+where
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut dest = vec![];
+ for element in elements {
+ dest.push_str(element);
+ }
+ dest
+}
+
+/// Join the elements given by the iterator with the given separator into a
+/// single `Vec<u8>`.
+///
+/// Both the separator and the elements may be any type that can be cheaply
+/// converted into an `&[u8]`. This includes, but is not limited to,
+/// `&str`, `&BStr` and `&[u8]` itself.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr;
+///
+/// let s = bstr::join(",", &["foo", "bar", "baz"]);
+/// assert_eq!(s, "foo,bar,baz".as_bytes());
+/// ```
+#[inline]
+pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
+where
+ B: AsRef<[u8]>,
+ T: AsRef<[u8]>,
+ I: IntoIterator<Item = T>,
+{
+ let mut it = elements.into_iter();
+ let mut dest = vec![];
+ match it.next() {
+ None => return dest,
+ Some(first) => {
+ dest.push_str(first);
+ }
+ }
+ for element in it {
+ dest.push_str(&separator);
+ dest.push_str(element);
+ }
+ dest
+}
+
+impl ByteVec for Vec<u8> {
+ #[inline]
+ fn as_vec(&self) -> &Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8> {
+ self
+ }
+
+ #[inline]
+ fn into_vec(self) -> Vec<u8> {
+ self
+ }
+}
+
+/// Ensure that callers cannot implement `ByteSlice` by making an
+/// umplementable trait its super trait.
+mod private {
+ pub trait Sealed {}
+}
+impl private::Sealed for Vec<u8> {}
+
+/// A trait that extends `Vec<u8>` with string oriented methods.
+///
+/// Note that when using the constructor methods, such as
+/// `ByteVec::from_slice`, one should actually call them using the concrete
+/// type. For example:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
+/// assert_eq!(s, B("abc"));
+/// ```
+///
+/// This trait is sealed and cannot be implemented outside of `bstr`.
+pub trait ByteVec: private::Sealed {
+ /// A method for accessing the raw vector bytes of this type. This is
+ /// always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec(&self) -> &Vec<u8>;
+
+ /// A method for accessing the raw vector bytes of this type, mutably. This
+ /// is always a no-op and callers shouldn't care about it. This only exists
+ /// for making the extension trait work.
+ #[doc(hidden)]
+ fn as_vec_mut(&mut self) -> &mut Vec<u8>;
+
+ /// A method for consuming ownership of this vector. This is always a no-op
+ /// and callers shouldn't care about it. This only exists for making the
+ /// extension trait work.
+ #[doc(hidden)]
+ fn into_vec(self) -> Vec<u8>
+ where
+ Self: Sized;
+
+ /// Create a new owned byte string from the given byte slice.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let s = Vec::from_slice(b"abc");
+ /// assert_eq!(s, B("abc"));
+ /// ```
+ #[inline]
+ fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
+ bytes.as_ref().to_vec()
+ }
+
+ /// Create a new byte string from an owned OS string.
+ ///
+ /// When the underlying bytes of OS strings are accessible, then this
+ /// always succeeds and is zero cost. Otherwise, this returns the given
+ /// `OsString` if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsString;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsString::from("foo");
+ /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(Vec::from(os_str.into_vec()))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
+ os_str.into_string().map(Vec::from)
+ }
+
+ imp(os_str)
+ }
+
+ /// Lossily create a new byte string from an OS string slice.
+ ///
+ /// When the underlying bytes of OS strings are accessible, then this is
+ /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
+ /// performed and if the given OS string is not valid UTF-8, then it is
+ /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
+ /// Unicode replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// let bs = Vec::from_os_str_lossy(os_str);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ #[cfg(unix)]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ use std::os::unix::ffi::OsStrExt;
+
+ Cow::Borrowed(os_str.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
+ match os_str.to_string_lossy() {
+ Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
+ Cow::Owned(x) => Cow::Owned(Vec::from(x)),
+ }
+ }
+
+ imp(os_str)
+ }
+
+ /// Create a new byte string from an owned file path.
+ ///
+ /// When the underlying bytes of paths are accessible, then this always
+ /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
+ /// if it is not valid UTF-8.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::PathBuf;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = PathBuf::from("foo");
+ /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
+ Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
+ }
+
+ /// Lossily create a new byte string from a file path.
+ ///
+ /// When the underlying bytes of paths are accessible, then this is
+ /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
+ /// performed and if the given path is not valid UTF-8, then it is lossily
+ /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
+ /// replacement codepoint).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::path::Path;
+ ///
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let path = Path::new("foo");
+ /// let bs = Vec::from_path_lossy(path);
+ /// assert_eq!(bs, B("foo"));
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
+ Vec::from_os_str_lossy(path.as_os_str())
+ }
+
+ /// Appends the given byte to the end of this byte string.
+ ///
+ /// Note that this is equivalent to the generic `Vec::push` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between pushing bytes, codepoints and strings.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_byte(b'\xE2');
+ /// s.push_byte(b'\x98');
+ /// s.push_byte(b'\x83');
+ /// assert_eq!(s, "abc☃".as_bytes());
+ /// ```
+ #[inline]
+ fn push_byte(&mut self, byte: u8) {
+ self.as_vec_mut().push(byte);
+ }
+
+ /// Appends the given `char` to the end of this byte string.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_char('1');
+ /// s.push_char('2');
+ /// s.push_char('3');
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_char(&mut self, ch: char) {
+ if ch.len_utf8() == 1 {
+ self.push_byte(ch as u8);
+ return;
+ }
+ self.as_vec_mut()
+ .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Appends the given slice to the end of this byte string. This accepts
+ /// any type that be converted to a `&[u8]`. This includes, but is not
+ /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = <Vec<u8>>::from("abc");
+ /// s.push_str(b"123");
+ /// assert_eq!(s, "abc123".as_bytes());
+ /// ```
+ #[inline]
+ fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
+ self.as_vec_mut().extend_from_slice(bytes.as_ref());
+ }
+
+ /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
+ /// valid UTF-8.
+ ///
+ /// If it is not valid UTF-8, then a
+ /// [`FromUtf8Error`](struct.FromUtf8Error.html)
+ /// is returned. (This error can be used to examine why UTF-8 validation
+ /// failed, or to regain the original byte string.)
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bytes = Vec::from("hello");
+ /// let string = bytes.into_string().unwrap();
+ ///
+ /// assert_eq!("hello", string);
+ /// ```
+ ///
+ /// If this byte string is not valid UTF-8, then an error will be returned.
+ /// That error can then be used to inspect the location at which invalid
+ /// UTF-8 was found, or to regain the original byte string:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// let bytes = Vec::from(err.into_vec());
+ /// assert_eq!(bytes, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ fn into_string(self) -> Result<String, FromUtf8Error>
+ where
+ Self: Sized,
+ {
+ match utf8::validate(self.as_vec()) {
+ Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
+ Ok(()) => {
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ unsafe { Ok(self.into_string_unchecked()) }
+ }
+ }
+ }
+
+ /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
+ /// contains invalid UTF-8, then the invalid bytes are replaced with the
+ /// Unicode replacement codepoint.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let string = bytes.into_string_lossy();
+ /// assert_eq!(string, "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ fn into_string_lossy(self) -> String
+ where
+ Self: Sized,
+ {
+ match self.as_vec().to_str_lossy() {
+ Cow::Borrowed(_) => {
+ // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
+ // the entire string is valid utf8.
+ unsafe { self.into_string_unchecked() }
+ }
+ Cow::Owned(s) => s,
+ }
+ }
+
+ /// Unsafely convert this byte string into a `String`, without checking for
+ /// valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// Callers *must* ensure that this byte string is valid UTF-8 before
+ /// calling this method. Converting a byte string into a `String` that is
+ /// not valid UTF-8 is considered undefined behavior.
+ ///
+ /// This routine is useful in performance sensitive contexts where the
+ /// UTF-8 validity of the byte string is already known and it is
+ /// undesirable to pay the cost of an additional UTF-8 validation check
+ /// that [`into_string`](#method.into_string) performs.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// // SAFETY: This is safe because string literals are guaranteed to be
+ /// // valid UTF-8 by the Rust compiler.
+ /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
+ /// assert_eq!("☃βツ", s);
+ /// ```
+ #[inline]
+ unsafe fn into_string_unchecked(self) -> String
+ where
+ Self: Sized,
+ {
+ String::from_utf8_unchecked(self.into_vec())
+ }
+
+ /// Converts this byte string into an OS string, in place.
+ ///
+ /// When OS strings can be constructed from arbitrary byte sequences, this
+ /// always succeeds and is zero cost. Otherwise, if this byte string is not
+ /// valid UTF-8, then an error (with the original byte string) is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::ffi::OsStr;
+ ///
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
+ /// assert_eq!(os_str, OsStr::new("foo"));
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn into_os_string(self) -> Result<OsString, FromUtf8Error>
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
+ use std::os::unix::ffi::OsStringExt;
+
+ Ok(OsString::from_vec(v))
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
+ v.into_string().map(OsString::from)
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Lossily converts this byte string into an OS string, in place.
+ ///
+ /// When OS strings can be constructed from arbitrary byte sequences, this
+ /// is zero cost and always returns a slice. Otherwise, this will perform a
+ /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+ /// the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths when
+ /// the representation of `OsString` is opaque.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let os_str = bs.into_os_string_lossy();
+ /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn into_os_string_lossy(self) -> OsString
+ where
+ Self: Sized,
+ {
+ #[cfg(unix)]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ use std::os::unix::ffi::OsStringExt;
+
+ OsString::from_vec(v)
+ }
+
+ #[cfg(not(unix))]
+ #[inline]
+ fn imp(v: Vec<u8>) -> OsString {
+ OsString::from(v.into_string_lossy())
+ }
+
+ imp(self.into_vec())
+ }
+
+ /// Converts this byte string into an owned file path, in place.
+ ///
+ /// When paths can be constructed from arbitrary byte sequences, this
+ /// always succeeds and is zero cost. Otherwise, if this byte string is not
+ /// valid UTF-8, then an error (with the original byte string) is returned.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from("foo");
+ /// let path = bs.into_path_buf().expect("should be valid UTF-8");
+ /// assert_eq!(path.as_os_str(), "foo");
+ /// ```
+ #[cfg(feature = "std")]
+ #[inline]
+ fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
+ where
+ Self: Sized,
+ {
+ self.into_os_string().map(PathBuf::from)
+ }
+
+ /// Lossily converts this byte string into an owned file path, in place.
+ ///
+ /// When paths can be constructed from arbitrary byte sequences, this is
+ /// zero cost and always returns a slice. Otherwise, this will perform a
+ /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
+ /// the Unicode replacement codepoint.
+ ///
+ /// Note that this can prevent the correct roundtripping of file paths when
+ /// the representation of `PathBuf` is opaque.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let bs = Vec::from_slice(b"foo\xFFbar");
+ /// let path = bs.into_path_buf_lossy();
+ /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
+ /// ```
+ #[inline]
+ #[cfg(feature = "std")]
+ fn into_path_buf_lossy(self) -> PathBuf
+ where
+ Self: Sized,
+ {
+ PathBuf::from(self.into_os_string_lossy())
+ }
+
+ /// Removes the last byte from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned.
+ ///
+ /// If the last codepoint in this byte string is not ASCII, then removing
+ /// the last byte could make this byte string contain invalid UTF-8.
+ ///
+ /// Note that this is equivalent to the generic `Vec::pop` method. This
+ /// method is provided to permit callers to explicitly differentiate
+ /// between popping bytes and codepoints.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'o'));
+ /// assert_eq!(s.pop_byte(), Some(b'f'));
+ /// assert_eq!(s.pop_byte(), None);
+ /// ```
+ #[inline]
+ fn pop_byte(&mut self) -> Option<u8> {
+ self.as_vec_mut().pop()
+ }
+
+ /// Removes the last codepoint from this `Vec<u8>` and returns it.
+ ///
+ /// If this byte string is empty, then `None` is returned. If the last
+ /// bytes of this byte string do not correspond to a valid UTF-8 code unit
+ /// sequence, then the Unicode replacement codepoint is yielded instead in
+ /// accordance with the
+ /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo");
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ ///
+ /// This shows the replacement codepoint substitution policy. Note that
+ /// the first pop yields a replacement codepoint but actually removes two
+ /// bytes. This is in contrast with subsequent pops when encountering
+ /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
+ /// code unit sequence.
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('o'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
+ /// assert_eq!(s.pop_char(), Some('f'));
+ /// assert_eq!(s.pop_char(), None);
+ /// ```
+ #[inline]
+ fn pop_char(&mut self) -> Option<char> {
+ let (ch, size) = utf8::decode_last_lossy(self.as_vec());
+ if size == 0 {
+ return None;
+ }
+ let new_len = self.as_vec().len() - size;
+ self.as_vec_mut().truncate(new_len);
+ Some(ch)
+ }
+
+ /// Removes a `char` from this `Vec<u8>` at the given byte position and
+ /// returns it.
+ ///
+ /// If the bytes at the given position do not lead to a valid UTF-8 code
+ /// unit sequence, then a
+ /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than or equal to this byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foo☃bar");
+ /// assert_eq!(s.remove_char(3), '☃');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ ///
+ /// This example shows how the Unicode replacement codepoint policy is
+ /// used:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from_slice(b"foo\xFFbar");
+ /// assert_eq!(s.remove_char(3), '\u{FFFD}');
+ /// assert_eq!(s, b"foobar");
+ /// ```
+ #[inline]
+ fn remove_char(&mut self, at: usize) -> char {
+ let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
+ assert!(
+ size > 0,
+ "expected {} to be less than {}",
+ at,
+ self.as_vec().len(),
+ );
+ self.as_vec_mut().drain(at..at + size);
+ ch
+ }
+
+ /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
+ /// position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_char(3, '☃');
+ /// assert_eq!(s, "foo☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_char(&mut self, at: usize, ch: char) {
+ self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
+ }
+
+ /// Inserts the given byte string into this byte string at a particular
+ /// byte position.
+ ///
+ /// This is an `O(n)` operation as it may copy a number of elements in this
+ /// byte string proportional to its length.
+ ///
+ /// The given byte string may be any type that can be cheaply converted
+ /// into a `&[u8]`. This includes, but is not limited to, `&str` and
+ /// `&[u8]`.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `at` is larger than the byte string's length.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.insert_str(3, "☃☃☃");
+ /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
+ /// ```
+ #[inline]
+ fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
+ let bytes = bytes.as_ref();
+ let len = self.as_vec().len();
+ assert!(at <= len, "expected {} to be <= {}", at, len);
+
+ // SAFETY: We'd like to efficiently splice in the given bytes into
+ // this byte string. Since we are only working with `u8` elements here,
+ // we only need to consider whether our bounds are correct and whether
+ // our byte string has enough space.
+ self.as_vec_mut().reserve(bytes.len());
+ unsafe {
+ // Shift bytes after `at` over by the length of `bytes` to make
+ // room for it. This requires referencing two regions of memory
+ // that may overlap, so we use ptr::copy.
+ ptr::copy(
+ self.as_vec().as_ptr().add(at),
+ self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
+ len - at,
+ );
+ // Now copy the bytes given into the room we made above. In this
+ // case, we know that the given bytes cannot possibly overlap
+ // with this byte string since we have a mutable borrow of the
+ // latter. Thus, we can use a nonoverlapping copy.
+ ptr::copy_nonoverlapping(
+ bytes.as_ptr(),
+ self.as_vec_mut().as_mut_ptr().add(at),
+ bytes.len(),
+ );
+ self.as_vec_mut().set_len(len + bytes.len());
+ }
+ }
+
+ /// Removes the specified range in this byte string and replaces it with
+ /// the given bytes. The given bytes do not need to have the same length
+ /// as the range provided.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is invalid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// s.replace_range(2..4, "xxxxx");
+ /// assert_eq!(s, "foxxxxxar".as_bytes());
+ /// ```
+ #[inline]
+ fn replace_range<R, B>(&mut self, range: R, replace_with: B)
+ where
+ R: ops::RangeBounds<usize>,
+ B: AsRef<[u8]>,
+ {
+ self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
+ }
+
+ /// Creates a draining iterator that removes the specified range in this
+ /// `Vec<u8>` and yields each of the removed bytes.
+ ///
+ /// Note that the elements specified by the given range are removed
+ /// regardless of whether the returned iterator is fully exhausted.
+ ///
+ /// Also note that is is unspecified how many bytes are removed from the
+ /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the given range is not valid.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::ByteVec;
+ ///
+ /// let mut s = Vec::from("foobar");
+ /// {
+ /// let mut drainer = s.drain_bytes(2..4);
+ /// assert_eq!(drainer.next(), Some(b'o'));
+ /// assert_eq!(drainer.next(), Some(b'b'));
+ /// assert_eq!(drainer.next(), None);
+ /// }
+ /// assert_eq!(s, "foar".as_bytes());
+ /// ```
+ #[inline]
+ fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
+ where
+ R: ops::RangeBounds<usize>,
+ {
+ DrainBytes { it: self.as_vec_mut().drain(range) }
+ }
+}
+
+/// A draining byte oriented iterator for `Vec<u8>`.
+///
+/// This iterator is created by
+/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::ByteVec;
+///
+/// let mut s = Vec::from("foobar");
+/// {
+/// let mut drainer = s.drain_bytes(2..4);
+/// assert_eq!(drainer.next(), Some(b'o'));
+/// assert_eq!(drainer.next(), Some(b'b'));
+/// assert_eq!(drainer.next(), None);
+/// }
+/// assert_eq!(s, "foar".as_bytes());
+/// ```
+#[derive(Debug)]
+pub struct DrainBytes<'a> {
+ it: vec::Drain<'a, u8>,
+}
+
+impl<'a> iter::FusedIterator for DrainBytes<'a> {}
+
+impl<'a> Iterator for DrainBytes<'a> {
+ type Item = u8;
+
+ #[inline]
+ fn next(&mut self) -> Option<u8> {
+ self.it.next()
+ }
+}
+
+impl<'a> DoubleEndedIterator for DrainBytes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<u8> {
+ self.it.next_back()
+ }
+}
+
+impl<'a> ExactSizeIterator for DrainBytes<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.it.len()
+ }
+}
+
+/// An error that may occur when converting a `Vec<u8>` to a `String`.
+///
+/// This error includes the original `Vec<u8>` that failed to convert to a
+/// `String`. This permits callers to recover the allocation used even if it
+/// it not valid UTF-8.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::{B, ByteVec};
+///
+/// let bytes = Vec::from_slice(b"foo\xFFbar");
+/// let err = bytes.into_string().unwrap_err();
+///
+/// assert_eq!(err.utf8_error().valid_up_to(), 3);
+/// assert_eq!(err.utf8_error().error_len(), Some(1));
+///
+/// // At no point in this example is an allocation performed.
+/// let bytes = Vec::from(err.into_vec());
+/// assert_eq!(bytes, B(b"foo\xFFbar"));
+/// ```
+#[derive(Debug, Eq, PartialEq)]
+pub struct FromUtf8Error {
+ original: Vec<u8>,
+ err: Utf8Error,
+}
+
+impl FromUtf8Error {
+ /// Return the original bytes as a slice that failed to convert to a
+ /// `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ &self.original
+ }
+
+ /// Consume this error and return the original byte string that failed to
+ /// convert to a `String`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ /// let original = err.into_vec();
+ ///
+ /// // At no point in this example is an allocation performed.
+ /// assert_eq!(original, B(b"foo\xFFbar"));
+ /// ```
+ #[inline]
+ pub fn into_vec(self) -> Vec<u8> {
+ self.original
+ }
+
+ /// Return the underlying UTF-8 error that occurred. This error provides
+ /// information on the nature and location of the invalid UTF-8 detected.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use bstr::{B, ByteVec};
+ ///
+ /// let bytes = Vec::from_slice(b"foo\xFFbar");
+ /// let err = bytes.into_string().unwrap_err();
+ ///
+ /// assert_eq!(err.utf8_error().valid_up_to(), 3);
+ /// assert_eq!(err.utf8_error().error_len(), Some(1));
+ /// ```
+ #[inline]
+ pub fn utf8_error(&self) -> &Utf8Error {
+ &self.err
+ }
+}
+
+#[cfg(feature = "std")]
+impl error::Error for FromUtf8Error {
+ #[inline]
+ fn description(&self) -> &str {
+ "invalid UTF-8 vector"
+ }
+}
+
+impl fmt::Display for FromUtf8Error {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}", self.err)
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ use crate::ext_vec::ByteVec;
+
+ #[test]
+ fn insert() {
+ let mut s = vec![];
+ s.insert_str(0, "foo");
+ assert_eq!(s, "foo".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(0, "foo");
+ assert_eq!(s, "fooa".as_bytes());
+
+ let mut s = Vec::from("a");
+ s.insert_str(1, "foo");
+ assert_eq!(s, "afoo".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quux");
+ assert_eq!(s, "fooquuxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "x");
+ assert_eq!(s, "fooxbar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(0, "x");
+ assert_eq!(s, "xfoobar".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(6, "x");
+ assert_eq!(s, "foobarx".as_bytes());
+
+ let mut s = Vec::from("foobar");
+ s.insert_str(3, "quuxbazquux");
+ assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail1() {
+ let mut s = vec![];
+ s.insert_str(1, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail2() {
+ let mut s = Vec::from("a");
+ s.insert_str(2, "foo");
+ }
+
+ #[test]
+ #[should_panic]
+ fn insert_fail3() {
+ let mut s = Vec::from("foobar");
+ s.insert_str(7, "foo");
+ }
+}
diff --git a/vendor/bstr/src/impls.rs b/vendor/bstr/src/impls.rs
new file mode 100644
index 000000000..c063cb6b6
--- /dev/null
+++ b/vendor/bstr/src/impls.rs
@@ -0,0 +1,1125 @@
+macro_rules! impl_partial_eq {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = other.as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = self.as_ref();
+ PartialEq::eq(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "alloc")]
+macro_rules! impl_partial_eq_cow {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialEq<$rhs> for $lhs {
+ #[inline]
+ fn eq(&self, other: &$rhs) -> bool {
+ let other: &[u8] = (&**other).as_ref();
+ PartialEq::eq(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialEq<$lhs> for $rhs {
+ #[inline]
+ fn eq(&self, other: &$lhs) -> bool {
+ let this: &[u8] = (&**other).as_ref();
+ PartialEq::eq(this, self.as_bytes())
+ }
+ }
+ };
+}
+
+macro_rules! impl_partial_ord {
+ ($lhs:ty, $rhs:ty) => {
+ impl<'a, 'b> PartialOrd<$rhs> for $lhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
+ let other: &[u8] = other.as_ref();
+ PartialOrd::partial_cmp(self.as_bytes(), other)
+ }
+ }
+
+ impl<'a, 'b> PartialOrd<$lhs> for $rhs {
+ #[inline]
+ fn partial_cmp(&self, other: &$lhs) -> Option<Ordering> {
+ let this: &[u8] = self.as_ref();
+ PartialOrd::partial_cmp(this, other.as_bytes())
+ }
+ }
+ };
+}
+
+#[cfg(feature = "alloc")]
+mod bstring {
+ use core::{
+ cmp::Ordering, convert::TryFrom, fmt, iter::FromIterator, ops,
+ };
+
+ use alloc::{
+ borrow::{Borrow, Cow, ToOwned},
+ string::String,
+ vec,
+ vec::Vec,
+ };
+
+ use crate::{
+ bstr::BStr, bstring::BString, ext_slice::ByteSlice, ext_vec::ByteVec,
+ };
+
+ impl fmt::Display for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl fmt::Debug for BString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.as_bstr(), f)
+ }
+ }
+
+ impl ops::Deref for BString {
+ type Target = Vec<u8>;
+
+ #[inline]
+ fn deref(&self) -> &Vec<u8> {
+ self.as_vec()
+ }
+ }
+
+ impl ops::DerefMut for BString {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut Vec<u8> {
+ self.as_vec_mut()
+ }
+ }
+
+ impl AsRef<[u8]> for BString {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.as_bytes()
+ }
+ }
+
+ impl AsRef<BStr> for BString {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl AsMut<[u8]> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ self.as_bytes_mut()
+ }
+ }
+
+ impl AsMut<BStr> for BString {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ self.as_mut_bstr()
+ }
+ }
+
+ impl Borrow<BStr> for BString {
+ #[inline]
+ fn borrow(&self) -> &BStr {
+ self.as_bstr()
+ }
+ }
+
+ impl ToOwned for BStr {
+ type Owned = BString;
+
+ #[inline]
+ fn to_owned(&self) -> BString {
+ BString::from(self)
+ }
+ }
+
+ impl Default for BString {
+ fn default() -> BString {
+ BString::from(vec![])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for BString {
+ #[inline]
+ fn from(s: &'a [u8]) -> BString {
+ BString::from(s.to_vec())
+ }
+ }
+
+ impl From<Vec<u8>> for BString {
+ #[inline]
+ fn from(s: Vec<u8>) -> BString {
+ BString::new(s)
+ }
+ }
+
+ impl From<BString> for Vec<u8> {
+ #[inline]
+ fn from(s: BString) -> Vec<u8> {
+ s.into_vec()
+ }
+ }
+
+ impl<'a> From<&'a str> for BString {
+ #[inline]
+ fn from(s: &'a str) -> BString {
+ BString::from(s.as_bytes().to_vec())
+ }
+ }
+
+ impl From<String> for BString {
+ #[inline]
+ fn from(s: String) -> BString {
+ BString::from(s.into_bytes())
+ }
+ }
+
+ impl<'a> From<&'a BStr> for BString {
+ #[inline]
+ fn from(s: &'a BStr) -> BString {
+ BString::from(s.bytes.to_vec())
+ }
+ }
+
+ impl<'a> From<BString> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: BString) -> Cow<'a, BStr> {
+ Cow::Owned(s)
+ }
+ }
+
+ impl TryFrom<BString> for String {
+ type Error = crate::FromUtf8Error;
+
+ #[inline]
+ fn try_from(s: BString) -> Result<String, crate::FromUtf8Error> {
+ s.into_vec().into_string()
+ }
+ }
+
+ impl<'a> TryFrom<&'a BString> for &'a str {
+ type Error = crate::Utf8Error;
+
+ #[inline]
+ fn try_from(s: &'a BString) -> Result<&'a str, crate::Utf8Error> {
+ s.as_bytes().to_str()
+ }
+ }
+
+ impl FromIterator<char> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<String>())
+ }
+ }
+
+ impl FromIterator<u8> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = u8>>(iter: T) -> BString {
+ BString::from(iter.into_iter().collect::<Vec<u8>>())
+ }
+ }
+
+ impl<'a> FromIterator<&'a str> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a [u8]> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a [u8]>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl<'a> FromIterator<&'a BStr> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = &'a BStr>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl FromIterator<BString> for BString {
+ #[inline]
+ fn from_iter<T: IntoIterator<Item = BString>>(iter: T) -> BString {
+ let mut buf = vec![];
+ for b in iter {
+ buf.push_str(b);
+ }
+ BString::from(buf)
+ }
+ }
+
+ impl Eq for BString {}
+
+ impl PartialEq for BString {
+ #[inline]
+ fn eq(&self, other: &BString) -> bool {
+ &self[..] == &other[..]
+ }
+ }
+
+ impl_partial_eq!(BString, Vec<u8>);
+ impl_partial_eq!(BString, [u8]);
+ impl_partial_eq!(BString, &'a [u8]);
+ impl_partial_eq!(BString, String);
+ impl_partial_eq!(BString, str);
+ impl_partial_eq!(BString, &'a str);
+ impl_partial_eq!(BString, BStr);
+ impl_partial_eq!(BString, &'a BStr);
+
+ impl PartialOrd for BString {
+ #[inline]
+ fn partial_cmp(&self, other: &BString) -> Option<Ordering> {
+ PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
+ }
+ }
+
+ impl Ord for BString {
+ #[inline]
+ fn cmp(&self, other: &BString) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BString, Vec<u8>);
+ impl_partial_ord!(BString, [u8]);
+ impl_partial_ord!(BString, &'a [u8]);
+ impl_partial_ord!(BString, String);
+ impl_partial_ord!(BString, str);
+ impl_partial_ord!(BString, &'a str);
+ impl_partial_ord!(BString, BStr);
+ impl_partial_ord!(BString, &'a BStr);
+}
+
+mod bstr {
+ use core::{cmp::Ordering, convert::TryFrom, fmt, ops};
+
+ #[cfg(feature = "alloc")]
+ use alloc::{borrow::Cow, boxed::Box, string::String, vec::Vec};
+
+ use crate::{bstr::BStr, ext_slice::ByteSlice};
+
+ impl fmt::Display for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// Write the given bstr (lossily) to the given formatter.
+ fn write_bstr(
+ f: &mut fmt::Formatter<'_>,
+ bstr: &BStr,
+ ) -> Result<(), fmt::Error> {
+ for chunk in bstr.utf8_chunks() {
+ f.write_str(chunk.valid())?;
+ if !chunk.invalid().is_empty() {
+ f.write_str("\u{FFFD}")?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Write 'num' fill characters to the given formatter.
+ fn write_pads(
+ f: &mut fmt::Formatter<'_>,
+ num: usize,
+ ) -> fmt::Result {
+ let fill = f.fill();
+ for _ in 0..num {
+ f.write_fmt(format_args!("{}", fill))?;
+ }
+ Ok(())
+ }
+
+ if let Some(align) = f.align() {
+ let width = f.width().unwrap_or(0);
+ let nchars = self.chars().count();
+ let remaining_pads = width.saturating_sub(nchars);
+ match align {
+ fmt::Alignment::Left => {
+ write_bstr(f, self)?;
+ write_pads(f, remaining_pads)?;
+ }
+ fmt::Alignment::Right => {
+ write_pads(f, remaining_pads)?;
+ write_bstr(f, self)?;
+ }
+ fmt::Alignment::Center => {
+ let half = remaining_pads / 2;
+ let second_half = if remaining_pads % 2 == 0 {
+ half
+ } else {
+ half + 1
+ };
+ write_pads(f, half)?;
+ write_bstr(f, self)?;
+ write_pads(f, second_half)?;
+ }
+ }
+ Ok(())
+ } else {
+ write_bstr(f, self)?;
+ Ok(())
+ }
+ }
+ }
+
+ impl fmt::Debug for BStr {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "\"")?;
+ for (s, e, ch) in self.char_indices() {
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ '\u{FFFD}' => {
+ let bytes = self[s..e].as_bytes();
+ if bytes == b"\xEF\xBF\xBD" {
+ write!(f, "{}", ch.escape_debug())?;
+ } else {
+ for &b in self[s..e].as_bytes() {
+ write!(f, r"\x{:02X}", b)?;
+ }
+ }
+ }
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", ch as u32)?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+ }
+
+ impl ops::Deref for BStr {
+ type Target = [u8];
+
+ #[inline]
+ fn deref(&self) -> &[u8] {
+ &self.bytes
+ }
+ }
+
+ impl ops::DerefMut for BStr {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl ops::Index<usize> for BStr {
+ type Output = u8;
+
+ #[inline]
+ fn index(&self, idx: usize) -> &u8 {
+ &self.as_bytes()[idx]
+ }
+ }
+
+ impl ops::Index<ops::RangeFull> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, _: ops::RangeFull) -> &BStr {
+ self
+ }
+ }
+
+ impl ops::Index<ops::Range<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::Range<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::Index<ops::RangeFrom<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeFrom<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[r.start..])
+ }
+ }
+
+ impl ops::Index<ops::RangeTo<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeTo<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..r.end])
+ }
+ }
+
+ impl ops::Index<ops::RangeToInclusive<usize>> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, r: ops::RangeToInclusive<usize>) -> &BStr {
+ BStr::new(&self.as_bytes()[..=r.end])
+ }
+ }
+
+ impl ops::IndexMut<usize> for BStr {
+ #[inline]
+ fn index_mut(&mut self, idx: usize) -> &mut u8 {
+ &mut self.bytes[idx]
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFull> for BStr {
+ #[inline]
+ fn index_mut(&mut self, _: ops::RangeFull) -> &mut BStr {
+ self
+ }
+ }
+
+ impl ops::IndexMut<ops::Range<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::Range<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[*r.start()..=*r.end()])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeFrom<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeFrom<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[r.start..])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeTo<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeTo<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..r.end])
+ }
+ }
+
+ impl ops::IndexMut<ops::RangeToInclusive<usize>> for BStr {
+ #[inline]
+ fn index_mut(&mut self, r: ops::RangeToInclusive<usize>) -> &mut BStr {
+ BStr::from_bytes_mut(&mut self.bytes[..=r.end])
+ }
+ }
+
+ impl AsRef<[u8]> for BStr {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.as_bytes()
+ }
+ }
+
+ impl AsRef<BStr> for BStr {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ self
+ }
+ }
+
+ impl AsRef<BStr> for [u8] {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsRef<BStr> for str {
+ #[inline]
+ fn as_ref(&self) -> &BStr {
+ BStr::new(self)
+ }
+ }
+
+ impl AsMut<[u8]> for BStr {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ &mut self.bytes
+ }
+ }
+
+ impl AsMut<BStr> for [u8] {
+ #[inline]
+ fn as_mut(&mut self) -> &mut BStr {
+ BStr::new_mut(self)
+ }
+ }
+
+ impl<'a> Default for &'a BStr {
+ fn default() -> &'a BStr {
+ BStr::from_bytes(b"")
+ }
+ }
+
+ impl<'a> Default for &'a mut BStr {
+ fn default() -> &'a mut BStr {
+ BStr::from_bytes_mut(&mut [])
+ }
+ }
+
+ impl<'a> From<&'a [u8]> for &'a BStr {
+ #[inline]
+ fn from(s: &'a [u8]) -> &'a BStr {
+ BStr::from_bytes(s)
+ }
+ }
+
+ impl<'a> From<&'a BStr> for &'a [u8] {
+ #[inline]
+ fn from(s: &'a BStr) -> &'a [u8] {
+ BStr::as_bytes(s)
+ }
+ }
+
+ impl<'a> From<&'a str> for &'a BStr {
+ #[inline]
+ fn from(s: &'a str) -> &'a BStr {
+ BStr::from_bytes(s.as_bytes())
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl<'a> From<&'a BStr> for Cow<'a, BStr> {
+ #[inline]
+ fn from(s: &'a BStr) -> Cow<'a, BStr> {
+ Cow::Borrowed(s)
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl From<Box<[u8]>> for Box<BStr> {
+ #[inline]
+ fn from(s: Box<[u8]>) -> Box<BStr> {
+ BStr::from_boxed_bytes(s)
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl From<Box<BStr>> for Box<[u8]> {
+ #[inline]
+ fn from(s: Box<BStr>) -> Box<[u8]> {
+ BStr::into_boxed_bytes(s)
+ }
+ }
+
+ impl<'a> TryFrom<&'a BStr> for &'a str {
+ type Error = crate::Utf8Error;
+
+ #[inline]
+ fn try_from(s: &'a BStr) -> Result<&'a str, crate::Utf8Error> {
+ s.as_bytes().to_str()
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl<'a> TryFrom<&'a BStr> for String {
+ type Error = crate::Utf8Error;
+
+ #[inline]
+ fn try_from(s: &'a BStr) -> Result<String, crate::Utf8Error> {
+ Ok(s.as_bytes().to_str()?.into())
+ }
+ }
+
+ #[cfg(feature = "alloc")]
+ impl Clone for Box<BStr> {
+ #[inline]
+ fn clone(&self) -> Self {
+ BStr::from_boxed_bytes(self.as_bytes().into())
+ }
+ }
+
+ impl Eq for BStr {}
+
+ impl PartialEq<BStr> for BStr {
+ #[inline]
+ fn eq(&self, other: &BStr) -> bool {
+ self.as_bytes() == other.as_bytes()
+ }
+ }
+
+ impl_partial_eq!(BStr, [u8]);
+ impl_partial_eq!(BStr, &'a [u8]);
+ impl_partial_eq!(BStr, str);
+ impl_partial_eq!(BStr, &'a str);
+
+ #[cfg(feature = "alloc")]
+ impl_partial_eq!(BStr, Vec<u8>);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq!(BStr, String);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq!(&'a BStr, String);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, BStr>);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, str>);
+ #[cfg(feature = "alloc")]
+ impl_partial_eq_cow!(&'a BStr, Cow<'a, [u8]>);
+
+ impl PartialOrd for BStr {
+ #[inline]
+ fn partial_cmp(&self, other: &BStr) -> Option<Ordering> {
+ PartialOrd::partial_cmp(self.as_bytes(), other.as_bytes())
+ }
+ }
+
+ impl Ord for BStr {
+ #[inline]
+ fn cmp(&self, other: &BStr) -> Ordering {
+ self.partial_cmp(other).unwrap()
+ }
+ }
+
+ impl_partial_ord!(BStr, [u8]);
+ impl_partial_ord!(BStr, &'a [u8]);
+ impl_partial_ord!(BStr, str);
+ impl_partial_ord!(BStr, &'a str);
+
+ #[cfg(feature = "alloc")]
+ impl_partial_ord!(BStr, Vec<u8>);
+ #[cfg(feature = "alloc")]
+ impl_partial_ord!(&'a BStr, Vec<u8>);
+ #[cfg(feature = "alloc")]
+ impl_partial_ord!(BStr, String);
+ #[cfg(feature = "alloc")]
+ impl_partial_ord!(&'a BStr, String);
+}
+
+#[cfg(feature = "serde")]
+mod bstr_serde {
+ use core::fmt;
+
+ use serde::{
+ de::Error, de::Visitor, Deserialize, Deserializer, Serialize,
+ Serializer,
+ };
+
+ use crate::bstr::BStr;
+
+ impl Serialize for BStr {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'a, 'de: 'a> Deserialize<'de> for &'a BStr {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<&'a BStr, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStrVisitor;
+
+ impl<'de> Visitor<'de> for BStrVisitor {
+ type Value = &'de BStr;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a borrowed byte string")
+ }
+
+ #[inline]
+ fn visit_borrowed_bytes<E: Error>(
+ self,
+ value: &'de [u8],
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+
+ #[inline]
+ fn visit_borrowed_str<E: Error>(
+ self,
+ value: &'de str,
+ ) -> Result<&'de BStr, E> {
+ Ok(BStr::new(value))
+ }
+ }
+
+ deserializer.deserialize_bytes(BStrVisitor)
+ }
+ }
+}
+
+#[cfg(all(feature = "serde", feature = "alloc"))]
+mod bstring_serde {
+ use core::{cmp, fmt};
+
+ use alloc::{boxed::Box, string::String, vec::Vec};
+
+ use serde::{
+ de::Error, de::SeqAccess, de::Visitor, Deserialize, Deserializer,
+ Serialize, Serializer,
+ };
+
+ use crate::{bstr::BStr, bstring::BString};
+
+ impl Serialize for BString {
+ #[inline]
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: Serializer,
+ {
+ serializer.serialize_bytes(self.as_bytes())
+ }
+ }
+
+ impl<'de> Deserialize<'de> for BString {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<BString, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BStringVisitor;
+
+ impl<'de> Visitor<'de> for BStringVisitor {
+ type Value = BString;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a byte string")
+ }
+
+ #[inline]
+ fn visit_seq<V: SeqAccess<'de>>(
+ self,
+ mut visitor: V,
+ ) -> Result<BString, V::Error> {
+ let len = cmp::min(visitor.size_hint().unwrap_or(0), 256);
+ let mut bytes = Vec::with_capacity(len);
+ while let Some(v) = visitor.next_element()? {
+ bytes.push(v);
+ }
+ Ok(BString::from(bytes))
+ }
+
+ #[inline]
+ fn visit_bytes<E: Error>(
+ self,
+ value: &[u8],
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_byte_buf<E: Error>(
+ self,
+ value: Vec<u8>,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_str<E: Error>(
+ self,
+ value: &str,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+
+ #[inline]
+ fn visit_string<E: Error>(
+ self,
+ value: String,
+ ) -> Result<BString, E> {
+ Ok(BString::from(value))
+ }
+ }
+
+ deserializer.deserialize_byte_buf(BStringVisitor)
+ }
+ }
+
+ impl<'de> Deserialize<'de> for Box<BStr> {
+ #[inline]
+ fn deserialize<D>(deserializer: D) -> Result<Box<BStr>, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ struct BoxedBStrVisitor;
+
+ impl<'de> Visitor<'de> for BoxedBStrVisitor {
+ type Value = Box<BStr>;
+
+ fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("a boxed byte string")
+ }
+
+ #[inline]
+ fn visit_seq<V: SeqAccess<'de>>(
+ self,
+ mut visitor: V,
+ ) -> Result<Box<BStr>, V::Error> {
+ let len = cmp::min(visitor.size_hint().unwrap_or(0), 256);
+ let mut bytes = Vec::with_capacity(len);
+ while let Some(v) = visitor.next_element()? {
+ bytes.push(v);
+ }
+ Ok(BStr::from_boxed_bytes(bytes.into_boxed_slice()))
+ }
+
+ #[inline]
+ fn visit_bytes<E: Error>(
+ self,
+ value: &[u8],
+ ) -> Result<Box<BStr>, E> {
+ Ok(BStr::from_boxed_bytes(
+ value.to_vec().into_boxed_slice(),
+ ))
+ }
+
+ #[inline]
+ fn visit_byte_buf<E: Error>(
+ self,
+ value: Vec<u8>,
+ ) -> Result<Box<BStr>, E> {
+ Ok(BStr::from_boxed_bytes(value.into_boxed_slice()))
+ }
+
+ #[inline]
+ fn visit_str<E: Error>(
+ self,
+ value: &str,
+ ) -> Result<Box<BStr>, E> {
+ Ok(BStr::from_boxed_bytes(
+ value.as_bytes().to_vec().into_boxed_slice(),
+ ))
+ }
+
+ #[inline]
+ fn visit_string<E: Error>(
+ self,
+ value: String,
+ ) -> Result<Box<BStr>, E> {
+ Ok(BStr::from_boxed_bytes(
+ value.into_bytes().into_boxed_slice(),
+ ))
+ }
+ }
+
+ deserializer.deserialize_byte_buf(BoxedBStrVisitor)
+ }
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod display {
+ #[cfg(not(miri))]
+ use crate::bstring::BString;
+ use crate::ByteSlice;
+
+ #[test]
+ fn clean() {
+ assert_eq!(&format!("{}", &b"abc".as_bstr()), "abc");
+ assert_eq!(&format!("{}", &b"\xf0\x28\x8c\xbc".as_bstr()), "�(��");
+ }
+
+ #[test]
+ fn width_bigger_than_bstr() {
+ assert_eq!(&format!("{:<7}!", &b"abc".as_bstr()), "abc !");
+ assert_eq!(&format!("{:>7}!", &b"abc".as_bstr()), " abc!");
+ assert_eq!(&format!("{:^7}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:^6}!", &b"abc".as_bstr()), " abc !");
+ assert_eq!(&format!("{:-<7}!", &b"abc".as_bstr()), "abc----!");
+ assert_eq!(&format!("{:->7}!", &b"abc".as_bstr()), "----abc!");
+ assert_eq!(&format!("{:-^7}!", &b"abc".as_bstr()), "--abc--!");
+ assert_eq!(&format!("{:-^6}!", &b"abc".as_bstr()), "-abc--!");
+
+ assert_eq!(
+ &format!("{:<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(�� !"
+ );
+ assert_eq!(
+ &format!("{:>7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(��!"
+ );
+ assert_eq!(
+ &format!("{:^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+ assert_eq!(
+ &format!("{:^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ " �(�� !"
+ );
+
+ assert_eq!(
+ &format!("{:-<7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��---!"
+ );
+ assert_eq!(
+ &format!("{:->7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "---�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^7}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��--!"
+ );
+ assert_eq!(
+ &format!("{:-^6}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "-�(��-!"
+ );
+ }
+
+ #[test]
+ fn width_lesser_than_bstr() {
+ assert_eq!(&format!("{:<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:>2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:^2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-<2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:->2}!", &b"abc".as_bstr()), "abc!");
+ assert_eq!(&format!("{:-^2}!", &b"abc".as_bstr()), "abc!");
+
+ assert_eq!(
+ &format!("{:<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:>3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+
+ assert_eq!(
+ &format!("{:-<3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:->3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^3}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ assert_eq!(
+ &format!("{:-^2}!", &b"\xf0\x28\x8c\xbc".as_bstr()),
+ "�(��!"
+ );
+ }
+
+ #[cfg(not(miri))]
+ quickcheck::quickcheck! {
+ fn total_length(bstr: BString) -> bool {
+ let size = bstr.chars().count();
+ format!("{:<1$}", bstr.as_bstr(), size).chars().count() >= size
+ }
+ }
+}
+
+#[cfg(all(test, feature = "alloc"))]
+mod bstring_arbitrary {
+ use crate::bstring::BString;
+
+ use quickcheck::{Arbitrary, Gen};
+
+ impl Arbitrary for BString {
+ fn arbitrary(g: &mut Gen) -> BString {
+ BString::from(Vec::<u8>::arbitrary(g))
+ }
+
+ fn shrink(&self) -> Box<dyn Iterator<Item = BString>> {
+ Box::new(self.as_vec().shrink().map(BString::from))
+ }
+ }
+}
+
+#[test]
+#[cfg(feature = "std")]
+fn test_debug() {
+ use crate::{ByteSlice, B};
+
+ assert_eq!(
+ r#""\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp""#,
+ format!("{:?}", b"\0\0\0 ftypisom\0\0\x02\0isomiso2avc1mp".as_bstr()),
+ );
+
+ // Tests that if the underlying bytes contain the UTF-8 encoding of the
+ // replacement codepoint, then we emit the codepoint just like other
+ // non-printable Unicode characters.
+ assert_eq!(
+ b"\"\\xFF\xEF\xBF\xBD\\xFF\"".as_bstr(),
+ // Before fixing #72, the output here would be:
+ // \\xFF\\xEF\\xBF\\xBD\\xFF
+ B(&format!("{:?}", b"\xFF\xEF\xBF\xBD\xFF".as_bstr())).as_bstr(),
+ );
+}
+
+// See: https://github.com/BurntSushi/bstr/issues/82
+#[test]
+#[cfg(feature = "std")]
+fn test_cows_regression() {
+ use std::borrow::Cow;
+
+ use crate::ByteSlice;
+
+ let c1 = Cow::from(b"hello bstr".as_bstr());
+ let c2 = b"goodbye bstr".as_bstr();
+ assert_ne!(c1, c2);
+
+ let c3 = Cow::from("hello str");
+ let c4 = "goodbye str";
+ assert_ne!(c3, c4);
+}
diff --git a/vendor/bstr/src/io.rs b/vendor/bstr/src/io.rs
new file mode 100644
index 000000000..a648145b6
--- /dev/null
+++ b/vendor/bstr/src/io.rs
@@ -0,0 +1,515 @@
+/*!
+Utilities for working with I/O using byte strings.
+
+This module currently only exports a single trait, `BufReadExt`, which provides
+facilities for conveniently and efficiently working with lines as byte strings.
+
+More APIs may be added in the future.
+*/
+
+use alloc::{vec, vec::Vec};
+
+use std::io;
+
+use crate::{ext_slice::ByteSlice, ext_vec::ByteVec};
+
+/// An extension trait for
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
+/// which provides convenience APIs for dealing with byte strings.
+pub trait BufReadExt: io::BufRead {
+ /// Returns an iterator over the lines of this reader, where each line
+ /// is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next line in the iterator is returned. The line does
+ /// *not* contain a trailing `\n` or `\r\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// for result in cursor.byte_lines() {
+ /// let line = result?;
+ /// lines.push(line);
+ /// }
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_lines(self) -> ByteLines<Self>
+ where
+ Self: Sized,
+ {
+ ByteLines { buf: self }
+ }
+
+ /// Returns an iterator over byte-terminated records of this reader, where
+ /// each record is represented as a byte string.
+ ///
+ /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
+ /// an error is yielded if there was a problem reading from the underlying
+ /// reader.
+ ///
+ /// On success, the next record in the iterator is returned. The record
+ /// does *not* contain its trailing terminator.
+ ///
+ /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
+ /// that it has no special handling for `\r`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// for result in cursor.byte_records(b'\x00') {
+ /// let record = result?;
+ /// records.push(record);
+ /// }
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn byte_records(self, terminator: u8) -> ByteRecords<Self>
+ where
+ Self: Sized,
+ {
+ ByteRecords { terminator, buf: self }
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+ /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem".as_bytes());
+ /// assert_eq!(lines[1], "ipsum".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line<F>(&mut self, mut for_each_line: F) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_line_with_terminator(|line| {
+ for_each_line(&trim_line_slice(&line))
+ })
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// The closure given is called on exactly the same values as yielded by
+ /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
+ /// iterator. Namely, records do _not_ contain a trailing terminator byte.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], "lorem".as_bytes());
+ /// assert_eq!(records[1], "ipsum".as_bytes());
+ /// assert_eq!(records[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record<F>(
+ &mut self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(terminator, |chunk| {
+ for_each_record(&trim_record_slice(&chunk, terminator))
+ })
+ }
+
+ /// Executes the given closure on each line in the underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
+ /// the lines given to the closure *do* include the line terminator, if one
+ /// exists.
+ ///
+ /// This routine is useful for iterating over lines as quickly as
+ /// possible. Namely, a single allocation is reused for each line.
+ ///
+ /// This is identical to `for_byte_record_with_terminator` with a
+ /// terminator of `\n`.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::io::BufReadExt;
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
+ ///
+ /// let mut lines = vec![];
+ /// cursor.for_byte_line_with_terminator(|line| {
+ /// lines.push(line.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(lines.len(), 3);
+ /// assert_eq!(lines[0], "lorem\n".as_bytes());
+ /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
+ /// assert_eq!(lines[2], "dolor".as_bytes());
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_line_with_terminator<F>(
+ &mut self,
+ for_each_line: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ self.for_byte_record_with_terminator(b'\n', for_each_line)
+ }
+
+ /// Executes the given closure on each byte-terminated record in the
+ /// underlying reader.
+ ///
+ /// If the closure returns an error (or if the underlying reader returns an
+ /// error), then iteration is stopped and the error is returned. If false
+ /// is returned, then iteration is stopped and no error is returned.
+ ///
+ /// Unlike
+ /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
+ /// the lines given to the closure *do* include the record terminator, if
+ /// one exists.
+ ///
+ /// This routine is useful for iterating over records as quickly as
+ /// possible. Namely, a single allocation is reused for each record.
+ ///
+ /// # Examples
+ ///
+ /// Basic usage:
+ ///
+ /// ```
+ /// use std::io;
+ ///
+ /// use bstr::{io::BufReadExt, B};
+ ///
+ /// # fn example() -> Result<(), io::Error> {
+ /// let mut cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
+ ///
+ /// let mut records = vec![];
+ /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
+ /// records.push(record.to_vec());
+ /// Ok(true)
+ /// })?;
+ /// assert_eq!(records.len(), 3);
+ /// assert_eq!(records[0], B(b"lorem\x00"));
+ /// assert_eq!(records[1], B("ipsum\x00"));
+ /// assert_eq!(records[2], B("dolor"));
+ /// # Ok(()) }; example().unwrap()
+ /// ```
+ fn for_byte_record_with_terminator<F>(
+ &mut self,
+ terminator: u8,
+ mut for_each_record: F,
+ ) -> io::Result<()>
+ where
+ Self: Sized,
+ F: FnMut(&[u8]) -> io::Result<bool>,
+ {
+ let mut bytes = vec![];
+ let mut res = Ok(());
+ let mut consumed = 0;
+ 'outer: loop {
+ // Lend out complete record slices from our buffer
+ {
+ let mut buf = self.fill_buf()?;
+ while let Some(index) = buf.find_byte(terminator) {
+ let (record, rest) = buf.split_at(index + 1);
+ buf = rest;
+ consumed += record.len();
+ match for_each_record(&record) {
+ Ok(false) => break 'outer,
+ Err(err) => {
+ res = Err(err);
+ break 'outer;
+ }
+ _ => (),
+ }
+ }
+
+ // Copy the final record fragment to our local buffer. This
+ // saves read_until() from re-scanning a buffer we know
+ // contains no remaining terminators.
+ bytes.extend_from_slice(&buf);
+ consumed += buf.len();
+ }
+
+ self.consume(consumed);
+ consumed = 0;
+
+ // N.B. read_until uses a different version of memchr that may
+ // be slower than the memchr crate that bstr uses. However, this
+ // should only run for a fairly small number of records, assuming a
+ // decent buffer size.
+ self.read_until(terminator, &mut bytes)?;
+ if bytes.is_empty() || !for_each_record(&bytes)? {
+ break;
+ }
+ bytes.clear();
+ }
+ self.consume(consumed);
+ res
+ }
+}
+
+impl<B: io::BufRead> BufReadExt for B {}
+
+/// An iterator over lines from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// This iterator is generally created by calling the
+/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteLines<B> {
+ buf: B,
+}
+
+/// An iterator over records from an instance of
+/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
+///
+/// A byte record is any sequence of bytes terminated by a particular byte
+/// chosen by the caller. For example, NUL separated byte strings are said to
+/// be NUL-terminated byte records.
+///
+/// This iterator is generally created by calling the
+/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
+/// method on the
+/// [`BufReadExt`](trait.BufReadExt.html)
+/// trait.
+#[derive(Debug)]
+pub struct ByteRecords<B> {
+ buf: B,
+ terminator: u8,
+}
+
+impl<B: io::BufRead> Iterator for ByteLines<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(b'\n', &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_line(&mut bytes);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+impl<B: io::BufRead> Iterator for ByteRecords<B> {
+ type Item = io::Result<Vec<u8>>;
+
+ fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
+ let mut bytes = vec![];
+ match self.buf.read_until(self.terminator, &mut bytes) {
+ Err(e) => Some(Err(e)),
+ Ok(0) => None,
+ Ok(_) => {
+ trim_record(&mut bytes, self.terminator);
+ Some(Ok(bytes))
+ }
+ }
+ }
+}
+
+fn trim_line(line: &mut Vec<u8>) {
+ if line.last_byte() == Some(b'\n') {
+ line.pop_byte();
+ if line.last_byte() == Some(b'\r') {
+ line.pop_byte();
+ }
+ }
+}
+
+fn trim_line_slice(mut line: &[u8]) -> &[u8] {
+ if line.last_byte() == Some(b'\n') {
+ line = &line[..line.len() - 1];
+ if line.last_byte() == Some(b'\r') {
+ line = &line[..line.len() - 1];
+ }
+ }
+ line
+}
+
+fn trim_record(record: &mut Vec<u8>, terminator: u8) {
+ if record.last_byte() == Some(terminator) {
+ record.pop_byte();
+ }
+}
+
+fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
+ if record.last_byte() == Some(terminator) {
+ record = &record[..record.len() - 1];
+ }
+ record
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ use crate::bstring::BString;
+
+ use super::BufReadExt;
+
+ fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
+ let mut lines = vec![];
+ slice
+ .as_ref()
+ .for_byte_line_with_terminator(|line| {
+ lines.push(BString::from(line.to_vec()));
+ Ok(true)
+ })
+ .unwrap();
+ lines
+ }
+
+ #[test]
+ fn lines_without_terminator() {
+ assert_eq!(collect_lines(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines("\n"), vec![""]);
+ assert_eq!(collect_lines("\n\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("\r\n"), vec![""]);
+ assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
+ assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
+ assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
+ assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
+ assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
+
+ assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+
+ #[test]
+ fn lines_with_terminator() {
+ assert_eq!(collect_lines_term(""), Vec::<BString>::new());
+
+ assert_eq!(collect_lines_term("\n"), vec!["\n"]);
+ assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
+ assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
+ assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
+ assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
+ assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
+ assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
+ assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
+ assert_eq!(
+ collect_lines_term("abc\r\nxyz\r\n"),
+ vec!["abc\r\n", "xyz\r\n"]
+ );
+ assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
+
+ assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
+ }
+}
diff --git a/vendor/bstr/src/lib.rs b/vendor/bstr/src/lib.rs
new file mode 100644
index 000000000..3d334ac63
--- /dev/null
+++ b/vendor/bstr/src/lib.rs
@@ -0,0 +1,482 @@
+/*!
+A byte string library.
+
+Byte strings are just like standard Unicode strings with one very important
+difference: byte strings are only *conventionally* UTF-8 while Rust's standard
+Unicode strings are *guaranteed* to be valid UTF-8. The primary motivation for
+byte strings is for handling arbitrary bytes that are mostly UTF-8.
+
+# Overview
+
+This crate provides two important traits that provide string oriented methods
+on `&[u8]` and `Vec<u8>` types:
+
+* [`ByteSlice`](trait.ByteSlice.html) extends the `[u8]` type with additional
+ string oriented methods.
+* [`ByteVec`](trait.ByteVec.html) extends the `Vec<u8>` type with additional
+ string oriented methods.
+
+Additionally, this crate provides two concrete byte string types that deref to
+`[u8]` and `Vec<u8>`. These are useful for storing byte string types, and come
+with convenient `std::fmt::Debug` implementations:
+
+* [`BStr`](struct.BStr.html) is a byte string slice, analogous to `str`.
+* [`BString`](struct.BString.html) is an owned growable byte string buffer,
+ analogous to `String`.
+
+Additionally, the free function [`B`](fn.B.html) serves as a convenient short
+hand for writing byte string literals.
+
+# Quick examples
+
+Byte strings build on the existing APIs for `Vec<u8>` and `&[u8]`, with
+additional string oriented methods. Operations such as iterating over
+graphemes, searching for substrings, replacing substrings, trimming and case
+conversion are examples of things not provided on the standard library `&[u8]`
+APIs but are provided by this crate. For example, this code iterates over all
+of occurrences of a substring:
+
+```
+use bstr::ByteSlice;
+
+let s = b"foo bar foo foo quux foo";
+
+let mut matches = vec![];
+for start in s.find_iter("foo") {
+ matches.push(start);
+}
+assert_eq!(matches, [0, 8, 12, 21]);
+```
+
+Here's another example showing how to do a search and replace (and also showing
+use of the `B` function):
+
+```
+# #[cfg(feature = "alloc")] {
+use bstr::{B, ByteSlice};
+
+let old = B("foo ☃☃☃ foo foo quux foo");
+let new = old.replace("foo", "hello");
+assert_eq!(new, B("hello ☃☃☃ hello hello quux hello"));
+# }
+```
+
+And here's an example that shows case conversion, even in the presence of
+invalid UTF-8:
+
+```
+# #[cfg(all(feature = "alloc", feature = "unicode"))] {
+use bstr::{ByteSlice, ByteVec};
+
+let mut lower = Vec::from("hello β");
+lower[0] = b'\xFF';
+// lowercase β is uppercased to Β
+assert_eq!(lower.to_uppercase(), b"\xFFELLO \xCE\x92");
+# }
+```
+
+# Convenient debug representation
+
+When working with byte strings, it is often useful to be able to print them
+as if they were byte strings and not sequences of integers. While this crate
+cannot affect the `std::fmt::Debug` implementations for `[u8]` and `Vec<u8>`,
+this crate does provide the `BStr` and `BString` types which have convenient
+`std::fmt::Debug` implementations.
+
+For example, this
+
+```
+use bstr::ByteSlice;
+
+let mut bytes = Vec::from("hello β");
+bytes[0] = b'\xFF';
+
+println!("{:?}", bytes.as_bstr());
+```
+
+will output `"\xFFello β"`.
+
+This example works because the
+[`ByteSlice::as_bstr`](trait.ByteSlice.html#method.as_bstr)
+method converts any `&[u8]` to a `&BStr`.
+
+# When should I use byte strings?
+
+This library reflects my belief that UTF-8 by convention is a better trade
+off in some circumstances than guaranteed UTF-8.
+
+The first time this idea hit me was in the implementation of Rust's regex
+engine. In particular, very little of the internal implementation cares at all
+about searching valid UTF-8 encoded strings. Indeed, internally, the
+implementation converts `&str` from the API to `&[u8]` fairly quickly and
+just deals with raw bytes. UTF-8 match boundaries are then guaranteed by the
+finite state machine itself rather than any specific string type. This makes it
+possible to not only run regexes on `&str` values, but also on `&[u8]` values.
+
+Why would you ever want to run a regex on a `&[u8]` though? Well, `&[u8]` is
+the fundamental way at which one reads data from all sorts of streams, via the
+standard library's [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html)
+trait. In particular, there is no platform independent way to determine whether
+what you're reading from is some binary file or a human readable text file.
+Therefore, if you're writing a program to search files, you probably need to
+deal with `&[u8]` directly unless you're okay with first converting it to a
+`&str` and dropping any bytes that aren't valid UTF-8. (Or otherwise determine
+the encoding---which is often impractical---and perform a transcoding step.)
+Often, the simplest and most robust way to approach this is to simply treat the
+contents of a file as if it were mostly valid UTF-8 and pass through invalid
+UTF-8 untouched. This may not be the most correct approach though!
+
+One case in particular exacerbates these issues, and that's memory mapping
+a file. When you memory map a file, that file may be gigabytes big, but all
+you get is a `&[u8]`. Converting that to a `&str` all in one go is generally
+not a good idea because of the costs associated with doing so, and also
+because it generally causes one to do two passes over the data instead of
+one, which is quite undesirable. It is of course usually possible to do it an
+incremental way by only parsing chunks at a time, but this is often complex to
+do or impractical. For example, many regex engines only accept one contiguous
+sequence of bytes at a time with no way to perform incremental matching.
+
+# `bstr` in public APIs
+
+This library is past version `1` and is expected to remain at version `1` for
+the foreseeable future. Therefore, it is encouraged to put types from `bstr`
+(like `BStr` and `BString`) in your public API if that makes sense for your
+crate.
+
+With that said, in general, it should be possible to avoid putting anything
+in this crate into your public APIs. Namely, you should never need to use the
+`ByteSlice` or `ByteVec` traits as bounds on public APIs, since their only
+purpose is to extend the methods on the concrete types `[u8]` and `Vec<u8>`,
+respectively. Similarly, it should not be necessary to put either the `BStr` or
+`BString` types into public APIs. If you want to use them internally, then they
+can be converted to/from `[u8]`/`Vec<u8>` as needed. The conversions are free.
+
+So while it shouldn't ever be 100% necessary to make `bstr` a public
+dependency, there may be cases where it is convenient to do so. This is an
+explicitly supported use case of `bstr`, and as such, major version releases
+should be exceptionally rare.
+
+
+# Differences with standard strings
+
+The primary difference between `[u8]` and `str` is that the former is
+conventionally UTF-8 while the latter is guaranteed to be UTF-8. The phrase
+"conventionally UTF-8" means that a `[u8]` may contain bytes that do not form
+a valid UTF-8 sequence, but operations defined on the type in this crate are
+generally most useful on valid UTF-8 sequences. For example, iterating over
+Unicode codepoints or grapheme clusters is an operation that is only defined
+on valid UTF-8. Therefore, when invalid UTF-8 is encountered, the Unicode
+replacement codepoint is substituted. Thus, a byte string that is not UTF-8 at
+all is of limited utility when using these crate.
+
+However, not all operations on byte strings are specifically Unicode aware. For
+example, substring search has no specific Unicode semantics ascribed to it. It
+works just as well for byte strings that are completely valid UTF-8 as for byte
+strings that contain no valid UTF-8 at all. Similarly for replacements and
+various other operations that do not need any Unicode specific tailoring.
+
+Aside from the difference in how UTF-8 is handled, the APIs between `[u8]` and
+`str` (and `Vec<u8>` and `String`) are intentionally very similar, including
+maintaining the same behavior for corner cases in things like substring
+splitting. There are, however, some differences:
+
+* Substring search is not done with `matches`, but instead, `find_iter`.
+ In general, this crate does not define any generic
+ [`Pattern`](https://doc.rust-lang.org/std/str/pattern/trait.Pattern.html)
+ infrastructure, and instead prefers adding new methods for different
+ argument types. For example, `matches` can search by a `char` or a `&str`,
+ where as `find_iter` can only search by a byte string. `find_char` can be
+ used for searching by a `char`.
+* Since `SliceConcatExt` in the standard library is unstable, it is not
+ possible to reuse that to implement `join` and `concat` methods. Instead,
+ [`join`](fn.join.html) and [`concat`](fn.concat.html) are provided as free
+ functions that perform a similar task.
+* This library bundles in a few more Unicode operations, such as grapheme,
+ word and sentence iterators. More operations, such as normalization and
+ case folding, may be provided in the future.
+* Some `String`/`str` APIs will panic if a particular index was not on a valid
+ UTF-8 code unit sequence boundary. Conversely, no such checking is performed
+ in this crate, as is consistent with treating byte strings as a sequence of
+ bytes. This means callers are responsible for maintaining a UTF-8 invariant
+ if that's important.
+* Some routines provided by this crate, such as `starts_with_str`, have a
+ `_str` suffix to differentiate them from similar routines already defined
+ on the `[u8]` type. The difference is that `starts_with` requires its
+ parameter to be a `&[u8]`, where as `starts_with_str` permits its parameter
+ to by anything that implements `AsRef<[u8]>`, which is more flexible. This
+ means you can write `bytes.starts_with_str("☃")` instead of
+ `bytes.starts_with("☃".as_bytes())`.
+
+Otherwise, you should find most of the APIs between this crate and the standard
+library string APIs to be very similar, if not identical.
+
+# Handling of invalid UTF-8
+
+Since byte strings are only *conventionally* UTF-8, there is no guarantee
+that byte strings contain valid UTF-8. Indeed, it is perfectly legal for a
+byte string to contain arbitrary bytes. However, since this library defines
+a *string* type, it provides many operations specified by Unicode. These
+operations are typically only defined over codepoints, and thus have no real
+meaning on bytes that are invalid UTF-8 because they do not map to a particular
+codepoint.
+
+For this reason, whenever operations defined only on codepoints are used, this
+library will automatically convert invalid UTF-8 to the Unicode replacement
+codepoint, `U+FFFD`, which looks like this: `�`. For example, an
+[iterator over codepoints](struct.Chars.html) will yield a Unicode
+replacement codepoint whenever it comes across bytes that are not valid UTF-8:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xFF\xFFz";
+let chars: Vec<char> = bs.chars().collect();
+assert_eq!(vec!['a', '\u{FFFD}', '\u{FFFD}', 'z'], chars);
+```
+
+There are a few ways in which invalid bytes can be substituted with a Unicode
+replacement codepoint. One way, not used by this crate, is to replace every
+individual invalid byte with a single replacement codepoint. In contrast, the
+approach this crate uses is called the "substitution of maximal subparts," as
+specified by the Unicode Standard (Chapter 3, Section 9). (This approach is
+also used by [W3C's Encoding Standard](https://www.w3.org/TR/encoding/).) In
+this strategy, a replacement codepoint is inserted whenever a byte is found
+that cannot possibly lead to a valid UTF-8 code unit sequence. If there were
+previous bytes that represented a *prefix* of a well-formed UTF-8 code unit
+sequence, then all of those bytes (up to 3) are substituted with a single
+replacement codepoint. For example:
+
+```
+use bstr::ByteSlice;
+
+let bs = b"a\xF0\x9F\x87z";
+let chars: Vec<char> = bs.chars().collect();
+// The bytes \xF0\x9F\x87 could lead to a valid UTF-8 sequence, but 3 of them
+// on their own are invalid. Only one replacement codepoint is substituted,
+// which demonstrates the "substitution of maximal subparts" strategy.
+assert_eq!(vec!['a', '\u{FFFD}', 'z'], chars);
+```
+
+If you do need to access the raw bytes for some reason in an iterator like
+`Chars`, then you should use the iterator's "indices" variant, which gives
+the byte offsets containing the invalid UTF-8 bytes that were substituted with
+the replacement codepoint. For example:
+
+```
+use bstr::{B, ByteSlice};
+
+let bs = b"a\xE2\x98z";
+let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
+// Even though the replacement codepoint is encoded as 3 bytes itself, the
+// byte range given here is only two bytes, corresponding to the original
+// raw bytes.
+assert_eq!(vec![(0, 1, 'a'), (1, 3, '\u{FFFD}'), (3, 4, 'z')], chars);
+
+// Thus, getting the original raw bytes is as simple as slicing the original
+// byte string:
+let chars: Vec<&[u8]> = bs.char_indices().map(|(s, e, _)| &bs[s..e]).collect();
+assert_eq!(vec![B("a"), B(b"\xE2\x98"), B("z")], chars);
+```
+
+# File paths and OS strings
+
+One of the premiere features of Rust's standard library is how it handles file
+paths. In particular, it makes it very hard to write incorrect code while
+simultaneously providing a correct cross platform abstraction for manipulating
+file paths. The key challenge that one faces with file paths across platforms
+is derived from the following observations:
+
+* On most Unix-like systems, file paths are an arbitrary sequence of bytes.
+* On Windows, file paths are an arbitrary sequence of 16-bit integers.
+
+(In both cases, certain sequences aren't allowed. For example a `NUL` byte is
+not allowed in either case. But we can ignore this for the purposes of this
+section.)
+
+Byte strings, like the ones provided in this crate, line up really well with
+file paths on Unix like systems, which are themselves just arbitrary sequences
+of bytes. It turns out that if you treat them as "mostly UTF-8," then things
+work out pretty well. On the contrary, byte strings _don't_ really work
+that well on Windows because it's not possible to correctly roundtrip file
+paths between 16-bit integers and something that looks like UTF-8 _without_
+explicitly defining an encoding to do this for you, which is anathema to byte
+strings, which are just bytes.
+
+Rust's standard library elegantly solves this problem by specifying an
+internal encoding for file paths that's only used on Windows called
+[WTF-8](https://simonsapin.github.io/wtf-8/). Its key properties are that they
+permit losslessly roundtripping file paths on Windows by extending UTF-8 to
+support an encoding of surrogate codepoints, while simultaneously supporting
+zero-cost conversion from Rust's Unicode strings to file paths. (Since UTF-8 is
+a proper subset of WTF-8.)
+
+The fundamental point at which the above strategy fails is when you want to
+treat file paths as things that look like strings in a zero cost way. In most
+cases, this is actually the wrong thing to do, but some cases call for it,
+for example, glob or regex matching on file paths. This is because WTF-8 is
+treated as an internal implementation detail, and there is no way to access
+those bytes via a public API. Therefore, such consumers are limited in what
+they can do:
+
+1. One could re-implement WTF-8 and re-encode file paths on Windows to WTF-8
+ by accessing their underlying 16-bit integer representation. Unfortunately,
+ this isn't zero cost (it introduces a second WTF-8 decoding step) and it's
+ not clear this is a good thing to do, since WTF-8 should ideally remain an
+ internal implementation detail. This is roughly the approach taken by the
+ [`os_str_bytes`](https://crates.io/crates/os_str_bytes) crate.
+2. One could instead declare that they will not handle paths on Windows that
+ are not valid UTF-16, and return an error when one is encountered.
+3. Like (2), but instead of returning an error, lossily decode the file path
+ on Windows that isn't valid UTF-16 into UTF-16 by replacing invalid bytes
+ with the Unicode replacement codepoint.
+
+While this library may provide facilities for (1) in the future, currently,
+this library only provides facilities for (2) and (3). In particular, a suite
+of conversion functions are provided that permit converting between byte
+strings, OS strings and file paths. For owned byte strings, they are:
+
+* [`ByteVec::from_os_string`](trait.ByteVec.html#method.from_os_string)
+* [`ByteVec::from_os_str_lossy`](trait.ByteVec.html#method.from_os_str_lossy)
+* [`ByteVec::from_path_buf`](trait.ByteVec.html#method.from_path_buf)
+* [`ByteVec::from_path_lossy`](trait.ByteVec.html#method.from_path_lossy)
+* [`ByteVec::into_os_string`](trait.ByteVec.html#method.into_os_string)
+* [`ByteVec::into_os_string_lossy`](trait.ByteVec.html#method.into_os_string_lossy)
+* [`ByteVec::into_path_buf`](trait.ByteVec.html#method.into_path_buf)
+* [`ByteVec::into_path_buf_lossy`](trait.ByteVec.html#method.into_path_buf_lossy)
+
+For byte string slices, they are:
+
+* [`ByteSlice::from_os_str`](trait.ByteSlice.html#method.from_os_str)
+* [`ByteSlice::from_path`](trait.ByteSlice.html#method.from_path)
+* [`ByteSlice::to_os_str`](trait.ByteSlice.html#method.to_os_str)
+* [`ByteSlice::to_os_str_lossy`](trait.ByteSlice.html#method.to_os_str_lossy)
+* [`ByteSlice::to_path`](trait.ByteSlice.html#method.to_path)
+* [`ByteSlice::to_path_lossy`](trait.ByteSlice.html#method.to_path_lossy)
+
+On Unix, all of these conversions are rigorously zero cost, which gives one
+a way to ergonomically deal with raw file paths exactly as they are using
+normal string-related functions. On Windows, these conversion routines perform
+a UTF-8 check and either return an error or lossily decode the file path
+into valid UTF-8, depending on which function you use. This means that you
+cannot roundtrip all file paths on Windows correctly using these conversion
+routines. However, this may be an acceptable downside since such file paths
+are exceptionally rare. Moreover, roundtripping isn't always necessary, for
+example, if all you're doing is filtering based on file paths.
+
+The reason why using byte strings for this is potentially superior than the
+standard library's approach is that a lot of Rust code is already lossily
+converting file paths to Rust's Unicode strings, which are required to be valid
+UTF-8, and thus contain latent bugs on Unix where paths with invalid UTF-8 are
+not terribly uncommon. If you instead use byte strings, then you're guaranteed
+to write correct code for Unix, at the cost of getting a corner case wrong on
+Windows.
+
+# Cargo features
+
+This crates comes with a few features that control standard library, serde
+and Unicode support.
+
+* `std` - **Enabled** by default. This provides APIs that require the standard
+ library, such as `Vec<u8>` and `PathBuf`. Enabling this feature also enables
+ the `alloc` feature and any other relevant `std` features for dependencies.
+* `alloc` - **Enabled** by default. This provides APIs that require allocations
+ via the `alloc` crate, such as `Vec<u8>`.
+* `unicode` - **Enabled** by default. This provides APIs that require sizable
+ Unicode data compiled into the binary. This includes, but is not limited to,
+ grapheme/word/sentence segmenters. When this is disabled, basic support such
+ as UTF-8 decoding is still included. Note that currently, enabling this
+ feature also requires enabling the `std` feature. It is expected that this
+ limitation will be lifted at some point.
+* `serde` - Enables implementations of serde traits for `BStr`, and also
+ `BString` when `alloc` is enabled.
+*/
+
+#![cfg_attr(not(any(feature = "std", test)), no_std)]
+#![cfg_attr(docsrs, feature(doc_auto_cfg))]
+
+// Why do we do this? Well, in order for us to use once_cell's 'Lazy' type to
+// load DFAs, it requires enabling its 'std' feature. Yet, there is really
+// nothing about our 'unicode' feature that requires 'std'. We could declare
+// that 'unicode = [std, ...]', which would be fine, but once regex-automata
+// 0.3 is a thing, I believe we can drop once_cell altogether and thus drop
+// the need for 'std' to be enabled when 'unicode' is enabled. But if we make
+// 'unicode' also enable 'std', then it would be a breaking change to remove
+// 'std' from that list.
+//
+// So, for right now, we force folks to explicitly say they want 'std' if they
+// want 'unicode'. In the future, we should be able to relax this.
+#[cfg(all(feature = "unicode", not(feature = "std")))]
+compile_error!("enabling 'unicode' requires enabling 'std'");
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+pub use crate::bstr::BStr;
+#[cfg(feature = "alloc")]
+pub use crate::bstring::BString;
+#[cfg(feature = "unicode")]
+pub use crate::ext_slice::Fields;
+pub use crate::ext_slice::{
+ ByteSlice, Bytes, FieldsWith, Find, FindReverse, Finder, FinderReverse,
+ Lines, LinesWithTerminator, Split, SplitN, SplitNReverse, SplitReverse, B,
+};
+#[cfg(feature = "alloc")]
+pub use crate::ext_vec::{concat, join, ByteVec, DrainBytes, FromUtf8Error};
+#[cfg(feature = "unicode")]
+pub use crate::unicode::{
+ GraphemeIndices, Graphemes, SentenceIndices, Sentences, WordIndices,
+ Words, WordsWithBreakIndices, WordsWithBreaks,
+};
+pub use crate::utf8::{
+ decode as decode_utf8, decode_last as decode_last_utf8, CharIndices,
+ Chars, Utf8Chunk, Utf8Chunks, Utf8Error,
+};
+
+mod ascii;
+mod bstr;
+#[cfg(feature = "alloc")]
+mod bstring;
+mod byteset;
+mod ext_slice;
+#[cfg(feature = "alloc")]
+mod ext_vec;
+mod impls;
+#[cfg(feature = "std")]
+pub mod io;
+#[cfg(all(test, feature = "std"))]
+mod tests;
+#[cfg(feature = "unicode")]
+mod unicode;
+mod utf8;
+
+#[cfg(all(test, feature = "std"))]
+mod apitests {
+ use crate::{
+ bstr::BStr,
+ bstring::BString,
+ ext_slice::{Finder, FinderReverse},
+ };
+
+ #[test]
+ fn oibits() {
+ use std::panic::{RefUnwindSafe, UnwindSafe};
+
+ fn assert_send<T: Send>() {}
+ fn assert_sync<T: Sync>() {}
+ fn assert_unwind_safe<T: RefUnwindSafe + UnwindSafe>() {}
+
+ assert_send::<&BStr>();
+ assert_sync::<&BStr>();
+ assert_unwind_safe::<&BStr>();
+ assert_send::<BString>();
+ assert_sync::<BString>();
+ assert_unwind_safe::<BString>();
+
+ assert_send::<Finder<'_>>();
+ assert_sync::<Finder<'_>>();
+ assert_unwind_safe::<Finder<'_>>();
+ assert_send::<FinderReverse<'_>>();
+ assert_sync::<FinderReverse<'_>>();
+ assert_unwind_safe::<FinderReverse<'_>>();
+ }
+}
diff --git a/vendor/bstr/src/tests.rs b/vendor/bstr/src/tests.rs
new file mode 100644
index 000000000..03a4461b7
--- /dev/null
+++ b/vendor/bstr/src/tests.rs
@@ -0,0 +1,32 @@
+/// A sequence of tests for checking whether lossy decoding uses the maximal
+/// subpart strategy correctly. Namely, if a sequence of otherwise invalid
+/// UTF-8 bytes is a valid prefix of a valid UTF-8 sequence, then the entire
+/// prefix is replaced by a single replacement codepoint. In all other cases,
+/// each invalid byte is replaced by a single replacement codepoint.
+///
+/// The first element in each tuple is the expected result of lossy decoding,
+/// while the second element is the input given.
+pub(crate) const LOSSY_TESTS: &[(&str, &[u8])] = &[
+ ("a", b"a"),
+ ("\u{FFFD}", b"\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xFF\xFF"),
+ ("β\u{FFFD}", b"\xCE\xB2\xFF"),
+ ("☃\u{FFFD}", b"\xE2\x98\x83\xFF"),
+ ("𝝱\u{FFFD}", b"\xF0\x9D\x9D\xB1\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xCE\xFF"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xE2\x98\xFF"),
+ ("\u{FFFD}", b"\xF0\x9D\x9D"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xF0"),
+ ("\u{FFFD}\u{FFFD}", b"\xF0\x9D\x9D\xFF"),
+ ("\u{FFFD}", b"\xCE"),
+ ("a\u{FFFD}", b"a\xCE"),
+ ("\u{FFFD}", b"\xE2\x98"),
+ ("a\u{FFFD}", b"a\xE2\x98"),
+ ("\u{FFFD}", b"\xF0\x9D\x9C"),
+ ("a\u{FFFD}", b"a\xF0\x9D\x9C"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}z", b"a\xED\xA0\x80z"),
+ ("☃βツ\u{FFFD}", b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF"),
+ ("a\u{FFFD}\u{FFFD}\u{FFFD}b", b"\x61\xF1\x80\x80\xE1\x80\xC2\x62"),
+];
diff --git a/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt b/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt
new file mode 100644
index 000000000..eff2fd33b
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/GraphemeBreakTest.txt
@@ -0,0 +1,630 @@
+# GraphemeBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:32 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Grapheme_Cluster_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Grapheme_Cluster_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of GraphemeBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0020 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0020 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 × 0308 × 034F ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0020 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 × 0308 ÷ 11A8 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0020 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000D ÷ 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 0308 × 034F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000D ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000D ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000D ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 000A ÷ 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 0308 × 034F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 000A ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 000A ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 000A ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0001 ÷ 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 0308 × 034F ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0001 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0001 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0001 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 034F ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 034F ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 034F ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 034F × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 034F ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 034F ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 034F ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 034F × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1F1E6 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 0308 × 034F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1F1E6 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 11A8 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1F1E6 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] SPACE (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0020 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0600 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0600 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 × 0308 ÷ 000A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0600 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0001 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0600 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 0308 × 034F ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0600 × 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 0308 ÷ 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0600 × 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × 0308 ÷ 11A8 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0600 × AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] <reserved-0378> (Other) ÷ [0.3]
+÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1100 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1100 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1100 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 × 0308 × 034F ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1100 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1100 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1100 × AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1160 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1160 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 1160 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 1160 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 1160 × 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 1160 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 11A8 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 11A8 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 11A8 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 × 0308 × 034F ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 11A8 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 11A8 × 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 11A8 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC00 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC00 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC00 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC00 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC00 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC00 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ AC01 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 × 0308 ÷ 000A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ AC01 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ AC01 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 × 0308 × 034F ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ AC01 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 0308 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ AC01 × 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 × 0308 ÷ 11A8 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ AC01 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 231A × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A × 0308 × 034F ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A × 0308 ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 231A ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A × 0308 ÷ 11A8 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 231A ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0300 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 × 0308 × 034F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0300 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 × 0308 ÷ 11A8 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0300 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 200D × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D × 0308 × 034F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D × 0308 ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 200D ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D × 0308 ÷ 11A8 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 200D ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3]
+÷ 0378 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 × 0308 × 034F ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3]
+÷ 0378 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0378 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3]
+÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3]
+÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3]
+÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3]
+÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3]
+÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3]
+÷ 0378 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC LETTER NOON (Other) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3]
+÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC00 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ AC01 × 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [8.0] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [12.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D ÷ 1F1E7 × 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3]
+÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 0061 × 1F3FF ÷ 1F476 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+#
+# Lines: 602
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/data/LICENSE-UNICODE b/vendor/bstr/src/unicode/data/LICENSE-UNICODE
new file mode 100644
index 000000000..ad0693577
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/LICENSE-UNICODE
@@ -0,0 +1,45 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+See Terms of Use for definitions of Unicode Inc.'s
+Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2019 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
diff --git a/vendor/bstr/src/unicode/data/SentenceBreakTest.txt b/vendor/bstr/src/unicode/data/SentenceBreakTest.txt
new file mode 100644
index 000000000..61ea42cf1
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/SentenceBreakTest.txt
@@ -0,0 +1,530 @@
+# SentenceBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:40 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Sentence_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Sentence_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of SentenceBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 × 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 × 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 × 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0308 × 0085 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0001 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0308 × 0009 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0001 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0308 × 0061 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0001 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 0308 × 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0001 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0308 × 01BB ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0001 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 × 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0308 × 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0001 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0308 × 0021 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0001 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 0308 × 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0001 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 0308 × 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 × 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 × 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0308 × 0085 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000D ÷ 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0308 × 0009 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0308 × 0061 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 0308 × 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000D ÷ 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0308 × 01BB ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 × 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000D ÷ 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0308 × 0021 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 0308 × 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 0308 × 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 × 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 × 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 × 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0308 × 0085 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 000A ÷ 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0308 × 0009 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0308 × 0061 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 0308 × 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 000A ÷ 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0308 × 01BB ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 × 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 000A ÷ 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0308 × 0021 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 0308 × 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 0308 × 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0001 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0085 ÷ 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000D ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0085 ÷ 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0308 × 000A ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0085 ÷ 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0085 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0085 ÷ 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0009 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0085 ÷ 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0061 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0085 ÷ 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0041 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0085 ÷ 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0308 × 01BB ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0085 ÷ 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0030 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0085 ÷ 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002E ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0085 ÷ 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0021 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0085 ÷ 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0022 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0085 ÷ 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 0308 × 002C ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0085 ÷ 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 00AD ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0085 ÷ 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0085 ÷ 0308 × 0300 ÷ # ÷ [0.2] <NEXT LINE (NEL)> (Sep) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 0308 × 0001 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0009 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 0308 × 000D ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0009 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0308 × 000A ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0009 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0308 × 0085 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0009 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0308 × 0009 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0009 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0308 × 0061 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0009 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 0308 × 0041 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0009 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0308 × 01BB ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0009 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 0308 × 0030 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0009 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0308 × 002E ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0009 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0308 × 0021 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0009 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 0308 × 0022 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0009 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 0308 × 002C ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0009 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0308 × 00AD ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0009 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0009 × 0308 × 0300 ÷ # ÷ [0.2] <CHARACTER TABULATION> (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 0308 × 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 0308 × 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0308 × 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0308 × 0085 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0061 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0308 × 0009 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0061 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0308 × 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0061 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0061 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0308 × 01BB ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0061 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0308 × 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0061 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0308 × 0021 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0061 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 0308 × 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0061 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 0308 × 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0061 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 × 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 × 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 × 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0308 × 0085 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0041 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0308 × 0009 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0041 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0041 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0308 × 01BB ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0308 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0041 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0308 × 0021 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0041 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 0308 × 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0041 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 0308 × 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 0308 × 0001 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 01BB × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 0308 × 000D ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 01BB × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0308 × 000A ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 01BB × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0308 × 0085 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 01BB × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0308 × 0009 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 01BB × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0308 × 0061 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 01BB × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 0308 × 0041 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 01BB × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0308 × 01BB ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 01BB × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 0308 × 0030 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 01BB × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0308 × 002E ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 01BB × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0308 × 0021 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 01BB × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 0308 × 0022 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 01BB × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 0308 × 002C ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 01BB × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0308 × 00AD ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 01BB × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 01BB × 0308 × 0300 ÷ # ÷ [0.2] LATIN LETTER TWO WITH STROKE (OLetter) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 × 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 × 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 × 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0308 × 0085 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0030 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0308 × 0009 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0030 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0030 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0308 × 01BB ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0308 × 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0030 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0308 × 0021 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0030 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 0308 × 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0030 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 0308 × 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 × 000D ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 × 000A ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0308 × 0085 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002E × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0308 × 0009 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002E × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E × 0308 × 0061 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002E ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 01BB ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002E × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 × 0030 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [6.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0308 × 002E ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 002E × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0308 × 0021 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002E × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 0308 × 0022 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002E × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 0308 × 002C ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (ATerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0001 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0021 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 0308 × 000D ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0021 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0308 × 000A ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0021 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0308 × 0085 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0021 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 × 0308 × 0009 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0021 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0061 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0021 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0041 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0021 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 × 0308 ÷ 01BB ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0021 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 0308 ÷ 0030 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0021 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0308 × 002E ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] FULL STOP (ATerm) ÷ [0.3]
+÷ 0021 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0308 × 0021 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0021 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 0308 × 0022 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [9.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0021 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 0308 × 002C ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.1] COMMA (SContinue) ÷ [0.3]
+÷ 0021 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0308 × 00AD ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0021 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0021 × 0308 × 0300 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 × 0001 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 × 000D ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 × 000A ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0308 × 0085 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0022 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0308 × 0009 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0022 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0308 × 0061 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0022 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 0308 × 0041 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0022 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0308 × 01BB ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0022 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 × 0030 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0308 × 002E ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0022 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0308 × 0021 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0022 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 0308 × 0022 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0022 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 0308 × 002C ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 × 0001 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 × 000D ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 × 000A ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0308 × 0085 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 002C × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0308 × 0009 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 002C × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0308 × 0061 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 002C × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 0308 × 0041 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 002C × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0308 × 01BB ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 002C × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 × 0030 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0308 × 002E ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 002C × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0308 × 0021 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 002C × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 0308 × 0022 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 002C × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 0308 × 002C ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (SContinue) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 × 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 × 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 × 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0308 × 0085 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 00AD × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0308 × 0009 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 00AD × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0308 × 0061 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 00AD × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 0308 × 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 00AD × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0308 × 01BB ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 00AD × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 × 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0308 × 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 00AD × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0308 × 0021 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 00AD × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 0308 × 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 00AD × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 0308 × 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 × 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 × 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 × 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0308 × 0085 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <NEXT LINE (NEL)> (Sep) ÷ [0.3]
+÷ 0300 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0308 × 0009 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] <CHARACTER TABULATION> (Sp) ÷ [0.3]
+÷ 0300 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0308 × 0061 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN SMALL LETTER A (Lower) ÷ [0.3]
+÷ 0300 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 0308 × 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER A (Upper) ÷ [0.3]
+÷ 0300 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0308 × 01BB ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN LETTER TWO WITH STROKE (OLetter) ÷ [0.3]
+÷ 0300 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 × 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0308 × 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0300 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0308 × 0021 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] EXCLAMATION MARK (STerm) ÷ [0.3]
+÷ 0300 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 0308 × 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] QUOTATION MARK (Close) ÷ [0.3]
+÷ 0300 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 0308 × 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [998.0] COMMA (SContinue) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 × 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [4.0] LATIN SMALL LETTER A (Lower) × [998.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Lower) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D × 0646 ÷ # ÷ [0.2] SPACE (Sp) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] ARABIC LETTER NOON (OLetter) ÷ [0.3]
+÷ 0646 × 200D × 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (OLetter) × [5.0] ZERO WIDTH JOINER (Extend_FE) × [998.0] SPACE (Sp) ÷ [0.3]
+÷ 0028 × 0022 × 0047 × 006F × 002E × 0022 × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] FULL STOP (ATerm) × [9.0] QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0028 × 201C × 0047 × 006F × 003F × 201D × 0029 × 0020 ÷ 0028 × 0048 × 0065 × 0020 × 0064 × 0069 × 0064 × 002E × 0029 ÷ # ÷ [0.2] LEFT PARENTHESIS (Close) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [998.0] LATIN CAPITAL LETTER G (Upper) × [998.0] LATIN SMALL LETTER O (Lower) × [998.0] QUESTION MARK (STerm) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] SPACE (Sp) ÷ [11.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E × 0020 × 0069 × 0073 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER I (Lower) × [998.0] LATIN SMALL LETTER S (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 003F × 0020 ÷ 0048 × 0065 ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [9.0] SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0055 × 002E × 0053 × 002E × 0041 × 0300 × 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER U (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER S (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) ÷ [0.3]
+÷ 0033 × 002E × 0034 ÷ # ÷ [0.2] DIGIT THREE (Numeric) × [998.0] FULL STOP (ATerm) × [6.0] DIGIT FOUR (Numeric) ÷ [0.3]
+÷ 0063 × 002E × 0064 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0043 × 002E × 0064 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [8.0] LATIN SMALL LETTER D (Lower) ÷ [0.3]
+÷ 0063 × 002E × 0044 ÷ # ÷ [0.2] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0043 × 002E × 0044 ÷ # ÷ [0.2] LATIN CAPITAL LETTER C (Upper) × [998.0] FULL STOP (ATerm) × [7.0] LATIN CAPITAL LETTER D (Upper) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 2018 × 0028 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 ÷ 2018 × 0028 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [998.0] LEFT PARENTHESIS (Close) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 × 0074 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.0] RIGHT PARENTHESIS (Close) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [8.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 00A0 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [9.0] NO-BREAK SPACE (Sp) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 2019 × 0308 ÷ 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 0029 × 000A ÷ 0308 × 0054 × 0068 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [9.0] RIGHT PARENTHESIS (Close) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 0074 × 0068 × 0065 × 0020 × 0072 × 0065 × 0073 × 0070 × 002E × 0020 × 006C × 0065 × 0061 × 0064 × 0065 × 0072 × 0073 × 0020 × 0061 × 0072 × 0065 ÷ # ÷ [0.2] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER H (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] LATIN SMALL LETTER P (Lower) × [998.0] FULL STOP (ATerm) × [8.0] SPACE (Sp) × [8.0] LATIN SMALL LETTER L (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER D (Lower) × [998.0] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER S (Lower) × [998.0] SPACE (Sp) × [998.0] LATIN SMALL LETTER A (Lower) × [998.0] LATIN SMALL LETTER R (Lower) × [998.0] LATIN SMALL LETTER E (Lower) ÷ [0.3]
+÷ 5B57 × 002E ÷ 5B57 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E ÷ 5B83 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0065 × 0074 × 0063 × 002E × 3002 ÷ # ÷ [0.2] LATIN SMALL LETTER E (Lower) × [998.0] LATIN SMALL LETTER T (Lower) × [998.0] LATIN SMALL LETTER C (Lower) × [998.0] FULL STOP (ATerm) × [8.1] IDEOGRAPHIC FULL STOP (STerm) ÷ [0.3]
+÷ 5B57 × 3002 ÷ 5B83 ÷ # ÷ [0.2] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [998.0] IDEOGRAPHIC FULL STOP (STerm) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) ÷ [0.3]
+÷ 0021 × 0020 × 0020 ÷ # ÷ [0.2] EXCLAMATION MARK (STerm) × [9.0] SPACE (Sp) × [10.0] SPACE (Sp) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 0022 × 2060 × 0047 × 2060 × 006F × 2060 × 002E × 2060 × 0022 × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0028 × 2060 × 201C × 2060 × 0047 × 2060 × 006F × 2060 × 003F × 2060 × 201D × 2060 × 0029 × 2060 × 0020 × 2060 ÷ 0028 × 2060 × 0048 × 2060 × 0065 × 2060 × 0020 × 2060 × 0064 × 2060 × 0069 × 2060 × 0064 × 2060 × 002E × 2060 × 0029 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER G (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER O (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT DOUBLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 0020 × 2060 × 0069 × 2060 × 0073 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER I (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 003F × 2060 × 0020 × 2060 ÷ 0048 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] QUESTION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER H (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0055 × 2060 × 002E × 2060 × 0053 × 2060 × 002E × 2060 × 0041 × 2060 × 0300 × 002E × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER U (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER S (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING GRAVE ACCENT (Extend_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0033 × 2060 × 002E × 2060 × 0034 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] DIGIT THREE (Numeric) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [6.0] DIGIT FOUR (Numeric) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0064 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0063 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0043 × 2060 × 002E × 2060 × 0044 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER C (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER D (Upper) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 2018 × 2060 × 0028 × 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 ÷ 2018 × 2060 × 0028 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] LEFT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LEFT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [8.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [8.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 00A0 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] NO-BREAK SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 2019 × 2060 × 0308 ÷ 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT SINGLE QUOTATION MARK (Close) × [5.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) ÷ [11.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 0029 × 2060 × 000A ÷ 2060 × 0308 × 2060 × 0054 × 2060 × 0068 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [9.0] RIGHT PARENTHESIS (Close) × [5.0] WORD JOINER (Format_FE) × [9.0] <LINE FEED (LF)> (LF) ÷ [4.0] WORD JOINER (Format_FE) × [5.0] COMBINING DIAERESIS (Extend_FE) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN CAPITAL LETTER T (Upper) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0074 × 2060 × 0068 × 2060 × 0065 × 2060 × 0020 × 2060 × 0072 × 2060 × 0065 × 2060 × 0073 × 2060 × 0070 × 2060 × 002E × 2060 × 0020 × 2060 × 006C × 2060 × 0065 × 2060 × 0061 × 2060 × 0064 × 2060 × 0065 × 2060 × 0072 × 2060 × 0073 × 2060 × 0020 × 2060 × 0061 × 2060 × 0072 × 2060 × 0065 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER H (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER P (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [8.0] LATIN SMALL LETTER L (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER D (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER S (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER A (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER R (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 002E × 2060 ÷ 5B57 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0065 × 2060 × 0074 × 2060 × 0063 × 2060 × 002E × 2060 × 3002 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER E (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER T (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] LATIN SMALL LETTER C (Lower) × [5.0] WORD JOINER (Format_FE) × [998.0] FULL STOP (ATerm) × [5.0] WORD JOINER (Format_FE) × [8.1] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 5B57 × 2060 × 3002 × 2060 ÷ 5B83 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] CJK UNIFIED IDEOGRAPH-5B57 (OLetter) × [5.0] WORD JOINER (Format_FE) × [998.0] IDEOGRAPHIC FULL STOP (STerm) × [5.0] WORD JOINER (Format_FE) ÷ [11.0] CJK UNIFIED IDEOGRAPH-5B83 (OLetter) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 2060 × 0021 × 2060 × 0020 × 2060 × 0020 × 2060 × 2060 ÷ # ÷ [0.2] WORD JOINER (Format_FE) × [998.0] EXCLAMATION MARK (STerm) × [5.0] WORD JOINER (Format_FE) × [9.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [10.0] SPACE (Sp) × [5.0] WORD JOINER (Format_FE) × [5.0] WORD JOINER (Format_FE) ÷ [0.3]
+#
+# Lines: 502
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/data/WordBreakTest.txt b/vendor/bstr/src/unicode/data/WordBreakTest.txt
new file mode 100644
index 000000000..1d1435bfc
--- /dev/null
+++ b/vendor/bstr/src/unicode/data/WordBreakTest.txt
@@ -0,0 +1,1851 @@
+# WordBreakTest-14.0.0.txt
+# Date: 2021-03-08, 06:22:40 GMT
+# © 2021 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Unicode Character Database
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Default Word_Break Test
+#
+# Format:
+# <string> (# <comment>)?
+# <string> contains hex Unicode code points, with
+# ÷ wherever there is a break opportunity, and
+# × wherever there is not.
+# <comment> the format can change, but currently it shows:
+# - the sample character name
+# - (x) the Word_Break property value for the sample character
+# - [x] the rule that determines whether there is a break or not,
+# as listed in the Rules section of WordBreakTest.html
+#
+# These samples may be extended or changed in the future.
+#
+÷ 0001 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0001 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0001 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 × 0308 ÷ 000B ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0001 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 × 0308 ÷ 3031 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0001 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0041 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0001 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 002E ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0001 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0030 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0001 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 × 0308 ÷ 005F ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0001 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0001 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 05D0 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0001 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0022 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0001 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 × 0308 ÷ 231A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0001 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 × 00AD ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0001 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 × 0308 × 200D ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0001 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0001 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000D × 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000D ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000D ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000D ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000D ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000D ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000D ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000D ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000D ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000D ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000D ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000D ÷ 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 00AD ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000D ÷ 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000D ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000A ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000A ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000A ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000A ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000A ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000A ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000A ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000A ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000A ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000A ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000A ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000A ÷ 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000A ÷ 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000A ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000A ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 000B ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 000B ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 000B ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 000B ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 000B ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 3031 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 000B ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0041 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 000B ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 002E ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 000B ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0030 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 000B ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 005F ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 000B ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 000B ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 05D0 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 000B ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0022 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 000B ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 231A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 000B ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 000B ÷ 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 00AD ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 000B ÷ 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0308 × 200D ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 000B ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 000B ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000B ÷ 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] <LINE TABULATION> (Newline) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0001 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 3031 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 3031 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 3031 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 0308 ÷ 000B ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 × 0308 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 3031 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0041 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 3031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 × 0308 ÷ 002E ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 3031 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0030 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 × 0308 × 005F ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 3031 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 3031 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 05D0 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 3031 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0022 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 3031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 × 0308 ÷ 231A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 3031 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0020 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 3031 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 × 00AD ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 3031 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 0308 × 0300 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 3031 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 × 0308 × 200D ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 3031 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 3031 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0041 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0041 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0041 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0041 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 0308 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0041 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 0308 × 0030 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0041 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 0308 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0041 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0041 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0041 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 00AD ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0041 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0300 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0041 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0308 × 200D ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0041 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0041 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0041 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0041 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 003A ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 003A ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 003A ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 003A ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 003A ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 003A ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 003A ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 003A ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 003A ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 003A ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 003A ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 003A ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 003A × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 003A × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 003A × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A × 0308 × 200D ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002C ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002C ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002C ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002C ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002C ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002C ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002C ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002C ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002C ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002C ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002C ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002C ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002C × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002C × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002C × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C × 0308 × 200D ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E × 0308 ÷ 0001 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 002E ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E × 0308 ÷ 000D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 002E ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E × 0308 ÷ 000A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 002E ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E × 0308 ÷ 000B ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 002E ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E × 0308 ÷ 3031 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 002E ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0041 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 002E ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 002E ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 002E ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E × 0308 ÷ 0030 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 002E ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E × 0308 ÷ 005F ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 002E ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 002E ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E × 0308 ÷ 05D0 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 002E ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0022 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 002E ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E × 0308 ÷ 231A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 002E ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 0308 ÷ 0020 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 002E × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0308 × 00AD ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 002E × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 0308 × 0300 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 002E × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E × 0308 × 200D ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 002E ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 002E ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 002E × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] FULL STOP (MidNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0030 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0030 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0030 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0030 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0030 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 0308 × 0041 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0030 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 × 0308 × 005F ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0030 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0030 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 × 0308 × 05D0 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0030 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0030 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0030 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0030 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0030 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0030 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0030 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [10.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0030 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0030 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0030 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [4.0] COMBINING DIAERESIS (Extend_FE) × [8.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F × 0308 ÷ 0001 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 005F ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F × 0308 ÷ 000D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 005F ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F × 0308 ÷ 000A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 005F ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 0308 ÷ 000B ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 005F × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0308 × 3031 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 005F × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F × 0308 × 0041 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 005F ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0308 ÷ 002E ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 005F × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 0308 × 0030 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 005F × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F × 0308 × 005F ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 005F ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 005F × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F × 0308 × 05D0 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 005F ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0022 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 005F ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F × 0308 ÷ 231A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 005F ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 0308 ÷ 0020 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 005F × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 00AD ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 005F × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 0308 × 0300 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 005F × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0308 × 200D ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 005F × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 005F × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 005F × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 005F × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 005F × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LOW LINE (ExtendNumLet) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0001 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 1F1E6 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 1F1E6 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 1F1E6 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 000B ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 1F1E6 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 3031 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 1F1E6 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0041 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 002E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 1F1E6 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0030 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 1F1E6 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 005F ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 1F1E6 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 × 0308 × 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [15.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 1F1E6 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 05D0 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 1F1E6 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0022 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 1F1E6 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0020 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 1F1E6 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 00AD ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 1F1E6 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 1F1E6 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0001 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 05D0 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 05D0 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 05D0 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 000B ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 05D0 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 3031 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 05D0 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0041 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 002E ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 05D0 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 0308 × 0030 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 05D0 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 × 0308 × 005F ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 05D0 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 05D0 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 × 0308 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 05D0 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0022 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 231A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 05D0 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 0308 ÷ 0020 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 05D0 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 00AD ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 05D0 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0300 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 05D0 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 200D ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 05D0 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 05D0 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 05D0 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0001 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0022 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0022 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0022 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 × 0308 ÷ 000B ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0022 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 × 0308 ÷ 3031 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0022 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0041 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0022 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 002E ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0022 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0030 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0022 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 × 0308 ÷ 005F ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0022 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0022 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 05D0 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0022 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0022 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0022 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 × 0308 ÷ 231A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0022 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0020 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0022 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 × 00AD ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0022 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 0308 × 0300 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0022 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 × 0308 × 200D ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0022 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0022 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] QUOTATION MARK (Double_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0027 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0027 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0027 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0027 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0027 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0027 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0027 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0027 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0027 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0027 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0027 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0027 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0027 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0027 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A × 0308 ÷ 0001 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A × 0308 ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 231A ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A × 0308 ÷ 000A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 231A ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A × 0308 ÷ 000B ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 231A ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A × 0308 ÷ 3031 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 231A ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0041 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 231A ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 002E ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 231A ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A × 0308 ÷ 0030 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 231A ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A × 0308 ÷ 005F ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 231A ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 231A ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A × 0308 ÷ 05D0 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 231A ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0022 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 231A ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 231A × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0308 × 00AD ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 231A ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 231A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 231A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] WATCH (ExtPict) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0020 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0020 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0020 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 × 0308 ÷ 000B ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0020 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 × 0308 ÷ 3031 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0020 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0041 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0020 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 002E ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0020 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0030 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0020 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 × 0308 ÷ 005F ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0020 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0020 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 05D0 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0020 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0022 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0020 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0020 × 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0020 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0020 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 × 00AD ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0020 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0020 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0001 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 00AD ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 00AD ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 00AD ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD × 0308 ÷ 000B ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 00AD ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD × 0308 ÷ 3031 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 00AD ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0041 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 00AD ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 002E ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 00AD ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0030 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 00AD ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD × 0308 ÷ 005F ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 00AD ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 00AD ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 05D0 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 00AD ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0022 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 00AD ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD × 0308 ÷ 231A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 00AD ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0020 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 00AD × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 × 00AD ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 00AD × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 0308 × 0300 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 00AD × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD × 0308 × 200D ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 00AD ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 00AD × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] SOFT HYPHEN (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0300 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0300 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 × 0308 ÷ 000B ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0300 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 × 0308 ÷ 3031 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0300 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0041 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0300 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 002E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0300 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0030 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0300 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 × 0308 ÷ 005F ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0300 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0300 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 05D0 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0300 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0022 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0300 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0300 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 × 00AD ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0300 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0300 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D × 0308 ÷ 0001 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D × 0308 ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 200D ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D × 0308 ÷ 000A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 200D ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D × 0308 ÷ 000B ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 200D ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D × 0308 ÷ 3031 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 200D ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0041 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 200D ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 002E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 200D ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D × 0308 ÷ 0030 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 200D ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D × 0308 ÷ 005F ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 200D ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 200D ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D × 0308 ÷ 05D0 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 200D ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0022 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 200D ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] WATCH (ExtPict) ÷ [0.3]
+÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 200D × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0308 × 00AD ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 200D ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 200D ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 200D × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 × 2060 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [9.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 003A × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 003A × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 003A × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 003A × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 003A × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 × 0027 × 2060 × 0308 × 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [6.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [7.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 0027 × 2060 × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0030 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0061 ÷ 002C × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0061 ÷ 002C × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 003A × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 003A × 0308 ÷ 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 0027 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 0027 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 0027 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 0027 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 0027 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 0027 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] APOSTROPHE (Single_Quote) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002C × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002C × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002C × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002C × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] COMMA (MidNum) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0001 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] <START OF HEADING> (Other) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 000B ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [3.2] <LINE TABULATION> (Newline) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 3031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0041 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 002E ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] FULL STOP (MidNumLet) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0030 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 005F ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 05D0 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0022 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] QUOTATION MARK (Double_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 231A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] WATCH (ExtPict) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0020 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 00AD ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] SOFT HYPHEN (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 0300 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 × 200D ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 0027 × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] APOSTROPHE (Single_Quote) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 ÷ 002E × 2060 × 0308 ÷ 0061 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 003A ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 0027 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002C ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 0031 × 002E × 2060 × 0308 × 0031 ÷ 002E × 2060 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [12.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [11.0] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) × [4.0] WORD JOINER (Format_FE) ÷ [0.3]
+÷ 000D × 000A ÷ 0061 ÷ 000A ÷ 0308 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) × [3.0] <LINE FEED (LF)> (LF) ÷ [3.1] LATIN SMALL LETTER A (ALetter) ÷ [3.2] <LINE FEED (LF)> (LF) ÷ [3.1] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0061 × 0308 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) ÷ [0.3]
+÷ 0020 × 200D ÷ 0646 ÷ # ÷ [0.2] SPACE (WSegSpace) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] ARABIC LETTER NOON (ALetter) ÷ [0.3]
+÷ 0646 × 200D ÷ 0020 ÷ # ÷ [0.2] ARABIC LETTER NOON (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] SPACE (WSegSpace) ÷ [0.3]
+÷ 0041 × 0041 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) × [5.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 × 003A × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [6.0] COLON (MidLetter) × [7.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0041 ÷ 003A ÷ 003A ÷ 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 05D0 × 0027 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.1] APOSTROPHE (Single_Quote) ÷ [0.3]
+÷ 05D0 × 0022 × 05D0 ÷ # ÷ [0.2] HEBREW LETTER ALEF (Hebrew_Letter) × [7.2] QUOTATION MARK (Double_Quote) × [7.3] HEBREW LETTER ALEF (Hebrew_Letter) ÷ [0.3]
+÷ 0041 × 0030 × 0030 × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [9.0] DIGIT ZERO (Numeric) × [8.0] DIGIT ZERO (Numeric) × [10.0] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 0030 × 002C × 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) × [12.0] COMMA (MidNum) × [11.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 0030 ÷ 002C ÷ 002C ÷ 0030 ÷ # ÷ [0.2] DIGIT ZERO (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ZERO (Numeric) ÷ [0.3]
+÷ 3031 × 3031 ÷ # ÷ [0.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.0] VERTICAL KANA REPEAT MARK (Katakana) ÷ [0.3]
+÷ 0041 × 005F × 0030 × 005F × 3031 × 005F ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ZERO (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] VERTICAL KANA REPEAT MARK (Katakana) × [13.1] LOW LINE (ExtendNumLet) ÷ [0.3]
+÷ 0041 × 005F × 005F × 0041 ÷ # ÷ [0.2] LATIN CAPITAL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN CAPITAL LETTER A (ALetter) ÷ [0.3]
+÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [15.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 × 200D ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 200D × 1F1E7 ÷ 1F1E8 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [16.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [999.0] BABY (ExtPict) ÷ [0.3]
+÷ 1F6D1 × 200D × 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 200D × 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 0061 × 200D × 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] UPPER BLADE SCISSORS (Other) ÷ [0.3]
+÷ 1F476 × 1F3FF × 0308 × 200D × 1F476 × 1F3FF ÷ # ÷ [0.2] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] BABY (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 1F6D1 × 1F3FF ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 × 1F3FF ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) × [4.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend_FE) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 200D × 1F6D1 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_FE) × [3.3] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 1F6D1 ÷ 1F6D1 ÷ # ÷ [0.2] OCTAGONAL SIGN (ExtPict) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3]
+÷ 0061 × 0308 × 200D × 0308 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [4.0] COMBINING DIAERESIS (Extend_FE) × [4.0] ZERO WIDTH JOINER (ZWJ_FE) × [4.0] COMBINING DIAERESIS (Extend_FE) × [5.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0061 ÷ 0020 × 0020 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] SPACE (WSegSpace) × [3.4] SPACE (WSegSpace) ÷ [999.0] LATIN SMALL LETTER B (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0031 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] DIGIT ONE (Numeric) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 003A ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COLON (MidLetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002E ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 003A ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COLON (MidLetter) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002E ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] FULL STOP (MidNumLet) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0031 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] DIGIT ONE (Numeric) ÷ [0.3]
+÷ 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0031 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] DIGIT ONE (Numeric) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+÷ 0061 × 005F × 0061 ÷ 002C ÷ 002C ÷ 0061 ÷ # ÷ [0.2] LATIN SMALL LETTER A (ALetter) × [13.1] LOW LINE (ExtendNumLet) × [13.2] LATIN SMALL LETTER A (ALetter) ÷ [999.0] COMMA (MidNum) ÷ [999.0] COMMA (MidNum) ÷ [999.0] LATIN SMALL LETTER A (ALetter) ÷ [0.3]
+#
+# Lines: 1823
+#
+# EOF
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..31f99c1f5
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..3a51728bb
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
new file mode 100644
index 000000000..dea4a7e3e
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_FWD --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static GRAPHEME_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
new file mode 100644
index 000000000..742d2a6a2
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
new file mode 100644
index 000000000..d1937f26c
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
new file mode 100644
index 000000000..2d2cd542f
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/grapheme_break_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name GRAPHEME_BREAK_REV --reverse --longest --sparse --minimize --anchored --state-size 2 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static GRAPHEME_BREAK_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("grapheme_break_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/mod.rs b/vendor/bstr/src/unicode/fsm/mod.rs
new file mode 100644
index 000000000..ae6c499fc
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/mod.rs
@@ -0,0 +1,8 @@
+pub mod grapheme_break_fwd;
+pub mod grapheme_break_rev;
+pub mod regional_indicator_rev;
+pub mod sentence_break_fwd;
+pub mod simple_word_fwd;
+pub mod whitespace_anchored_fwd;
+pub mod whitespace_anchored_rev;
+pub mod word_break_fwd;
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
new file mode 100644
index 000000000..1a3357f71
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
new file mode 100644
index 000000000..e437aae3a
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
new file mode 100644
index 000000000..db7a40fcd
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/regional_indicator_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name REGIONAL_INDICATOR_REV --reverse --classes --minimize --anchored --premultiply --state-size 1 src/unicode/fsm/ \p{gcb=Regional_Indicator}
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static REGIONAL_INDICATOR_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("regional_indicator_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..1abdae880
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..2f8aadd30
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
new file mode 100644
index 000000000..97dd658e4
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/sentence_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SENTENCE_BREAK_FWD --minimize --sparse --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static SENTENCE_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("sentence_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
new file mode 100644
index 000000000..888e46599
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
new file mode 100644
index 000000000..a1d527c74
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
new file mode 100644
index 000000000..32b69b611
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/simple_word_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name SIMPLE_WORD_FWD --sparse --minimize --state-size 2 src/unicode/fsm/ \w
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static SIMPLE_WORD_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("simple_word_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
new file mode 100644
index 000000000..bcfc4e9a1
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
new file mode 100644
index 000000000..d534a464a
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
new file mode 100644
index 000000000..0780412ae
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_FWD --anchored --classes --premultiply --minimize --state-size 1 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static WHITESPACE_ANCHORED_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u8], u8>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
new file mode 100644
index 000000000..427d3a922
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
new file mode 100644
index 000000000..7cc3a0a99
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
new file mode 100644
index 000000000..3d0d7a661
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/whitespace_anchored_rev.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WHITESPACE_ANCHORED_REV --reverse --anchored --classes --premultiply --minimize --state-size 2 src/unicode/fsm/ \s+
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u16], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u16; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static WHITESPACE_ANCHORED_REV: ::once_cell::sync::Lazy<
+ ::regex_automata::DenseDFA<&'static [u16], u16>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u16; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("whitespace_anchored_rev.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::DenseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
new file mode 100644
index 000000000..efb9c8198
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.bigendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
new file mode 100644
index 000000000..9a716d060
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.littleendian.dfa
Binary files differ
diff --git a/vendor/bstr/src/unicode/fsm/word_break_fwd.rs b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
new file mode 100644
index 000000000..dcb5f6bce
--- /dev/null
+++ b/vendor/bstr/src/unicode/fsm/word_break_fwd.rs
@@ -0,0 +1,41 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+// ucd-generate dfa --name WORD_BREAK_FWD --sparse --minimize --anchored --state-size 4 src/unicode/fsm/ [snip (arg too long)]
+//
+// ucd-generate 0.2.12 is available on crates.io.
+
+#[cfg(target_endian = "big")]
+pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.bigendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
+
+#[cfg(target_endian = "little")]
+pub static WORD_BREAK_FWD: ::once_cell::sync::Lazy<
+ ::regex_automata::SparseDFA<&'static [u8], u32>,
+> = ::once_cell::sync::Lazy::new(|| {
+ #[repr(C)]
+ struct Aligned<B: ?Sized> {
+ _align: [u8; 0],
+ bytes: B,
+ }
+
+ static ALIGNED: &'static Aligned<[u8]> = &Aligned {
+ _align: [],
+ bytes: *include_bytes!("word_break_fwd.littleendian.dfa"),
+ };
+
+ unsafe { ::regex_automata::SparseDFA::from_bytes(&ALIGNED.bytes) }
+});
diff --git a/vendor/bstr/src/unicode/grapheme.rs b/vendor/bstr/src/unicode/grapheme.rs
new file mode 100644
index 000000000..13b730c48
--- /dev/null
+++ b/vendor/bstr/src/unicode/grapheme.rs
@@ -0,0 +1,381 @@
+use regex_automata::DFA;
+
+use crate::{
+ ext_slice::ByteSlice,
+ unicode::fsm::{
+ grapheme_break_fwd::GRAPHEME_BREAK_FWD,
+ grapheme_break_rev::GRAPHEME_BREAK_REV,
+ regional_indicator_rev::REGIONAL_INDICATOR_REV,
+ },
+ utf8,
+};
+
+/// An iterator over grapheme clusters in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::graphemes`](trait.ByteSlice.html#method.graphemes).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Graphemes<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Graphemes<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Graphemes<'a> {
+ Graphemes { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".graphemes();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Graphemes<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(grapheme)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Graphemes<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<&'a str> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(grapheme)
+ }
+}
+
+/// An iterator over grapheme clusters in a byte string and their byte index
+/// positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::grapheme_indices`](trait.ByteSlice.html#method.grapheme_indices).
+///
+/// Unicode defines a grapheme cluster as an *approximation* to a single user
+/// visible character. A grapheme cluster, or just "grapheme," is made up of
+/// one or more codepoints. For end user oriented tasks, one should generally
+/// prefer using graphemes instead of [`Chars`](struct.Chars.html), which
+/// always yields one codepoint at a time.
+///
+/// Since graphemes are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// grapheme cluster yielded with those indices. For example, when this
+/// iterator encounters `\xFF` in the byte string, then it will yield a pair
+/// of indices ranging over a single byte, but will provide an `&str`
+/// equivalent to `"\u{FFFD}"`, which is three bytes in length. However, when
+/// given only valid UTF-8, then all indices are in exact correspondence with
+/// their paired grapheme cluster.
+///
+/// This iterator can be used in reverse. When reversed, exactly the same
+/// set of grapheme clusters are yielded, but in reverse order.
+///
+/// This iterator only yields *extended* grapheme clusters, in accordance with
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Grapheme_Cluster_Boundaries).
+#[derive(Clone, Debug)]
+pub struct GraphemeIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> GraphemeIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> GraphemeIndices<'a> {
+ GraphemeIndices { bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".grapheme_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for GraphemeIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (grapheme, size) = decode_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, grapheme))
+ }
+}
+
+impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, &'a str)> {
+ let (grapheme, size) = decode_last_grapheme(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, grapheme))
+ }
+}
+
+/// Decode a grapheme from the given byte string.
+///
+/// This returns the resulting grapheme (which may be a Unicode replacement
+/// codepoint if invalid UTF-8 was found), along with the number of bytes
+/// decoded in the byte string. The number of bytes decoded may not be the
+/// same as the length of grapheme in the case where invalid UTF-8 is found.
+pub fn decode_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if bs.len() >= 2
+ && bs[0].is_ascii()
+ && bs[1].is_ascii()
+ && !bs[0].is_ascii_whitespace()
+ {
+ // FIXME: It is somewhat sad that we have to special case this, but it
+ // leads to a significant speed up in predominantly ASCII text. The
+ // issue here is that the DFA has a bit of overhead, and running it for
+ // every byte in mostly ASCII text results in a bit slowdown. We should
+ // re-litigate this once regex-automata 0.3 is out, but it might be
+ // hard to avoid the special case. A DFA is always going to at least
+ // require some memory access.
+
+ // Safe because all ASCII bytes are valid UTF-8.
+ let grapheme = unsafe { bs[..1].to_str_unchecked() };
+ (grapheme, 1)
+ } else if let Some(end) = GRAPHEME_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[..end].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+fn decode_last_grapheme(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(mut start) = GRAPHEME_BREAK_REV.rfind(bs) {
+ start = adjust_rev_for_regional_indicator(bs, start);
+ // Safe because a match can only occur for valid UTF-8.
+ let grapheme = unsafe { bs[start..].to_str_unchecked() };
+ (grapheme, grapheme.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_last_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+/// Return the correct offset for the next grapheme decoded at the end of the
+/// given byte string, where `i` is the initial guess. In particular,
+/// `&bs[i..]` represents the candidate grapheme.
+///
+/// `i` is returned by this function in all cases except when `&bs[i..]` is
+/// a pair of regional indicator codepoints. In that case, if an odd number of
+/// additional regional indicator codepoints precedes `i`, then `i` is
+/// adjusted such that it points to only a single regional indicator.
+///
+/// This "fixing" is necessary to handle the requirement that a break cannot
+/// occur between regional indicators where it would cause an odd number of
+/// regional indicators to exist before the break from the *start* of the
+/// string. A reverse regex cannot detect this case easily without look-around.
+fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
+ // All regional indicators use a 4 byte encoding, and we only care about
+ // the case where we found a pair of regional indicators.
+ if bs.len() - i != 8 {
+ return i;
+ }
+ // Count all contiguous occurrences of regional indicators. If there's an
+ // even number of them, then we can accept the pair we found. Otherwise,
+ // we can only take one of them.
+ //
+ // FIXME: This is quadratic in the worst case, e.g., a string of just
+ // regional indicator codepoints. A fix probably requires refactoring this
+ // code a bit such that we don't rescan regional indicators.
+ let mut count = 0;
+ while let Some(start) = REGIONAL_INDICATOR_REV.rfind(bs) {
+ bs = &bs[..start];
+ count += 1;
+ }
+ if count % 2 == 0 {
+ i
+ } else {
+ i + 4
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ #[cfg(not(miri))]
+ use ucd_parse::GraphemeClusterBreakTest;
+
+ use crate::{ext_slice::ByteSlice, tests::LOSSY_TESTS};
+
+ use super::*;
+
+ #[test]
+ #[cfg(not(miri))]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let got: Vec<String> = Graphemes::new(given.as_bytes())
+ .map(|cluster| cluster.to_string())
+ .collect();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\ngrapheme forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ #[cfg(not(miri))]
+ fn reverse_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.grapheme_clusters.concat();
+ let mut got: Vec<String> = Graphemes::new(given.as_bytes())
+ .rev()
+ .map(|cluster| cluster.to_string())
+ .collect();
+ got.reverse();
+ assert_eq!(
+ test.grapheme_clusters,
+ got,
+ "\n\ngrapheme reverse break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ uniescape(&given),
+ uniescape_vec(&test.grapheme_clusters),
+ uniescape_vec(&got),
+ );
+ }
+ }
+
+ #[test]
+ fn forward_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let got = Graphemes::new(input.as_bytes()).collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ #[test]
+ fn reverse_lossy() {
+ for &(expected, input) in LOSSY_TESTS {
+ let expected: String = expected.chars().rev().collect();
+ let got =
+ Graphemes::new(input.as_bytes()).rev().collect::<String>();
+ assert_eq!(expected, got);
+ }
+ }
+
+ #[cfg(not(miri))]
+ fn uniescape(s: &str) -> String {
+ s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
+ }
+
+ #[cfg(not(miri))]
+ fn uniescape_vec(strs: &[String]) -> Vec<String> {
+ strs.iter().map(|s| uniescape(s)).collect()
+ }
+
+ /// Return all of the UCD for grapheme breaks.
+ #[cfg(not(miri))]
+ fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/GraphemeBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/unicode/mod.rs b/vendor/bstr/src/unicode/mod.rs
new file mode 100644
index 000000000..80638e87c
--- /dev/null
+++ b/vendor/bstr/src/unicode/mod.rs
@@ -0,0 +1,12 @@
+pub use self::{
+ grapheme::{decode_grapheme, GraphemeIndices, Graphemes},
+ sentence::{SentenceIndices, Sentences},
+ whitespace::{whitespace_len_fwd, whitespace_len_rev},
+ word::{WordIndices, Words, WordsWithBreakIndices, WordsWithBreaks},
+};
+
+mod fsm;
+mod grapheme;
+mod sentence;
+mod whitespace;
+mod word;
diff --git a/vendor/bstr/src/unicode/sentence.rs b/vendor/bstr/src/unicode/sentence.rs
new file mode 100644
index 000000000..ff29c7e25
--- /dev/null
+++ b/vendor/bstr/src/unicode/sentence.rs
@@ -0,0 +1,225 @@
+use regex_automata::DFA;
+
+use crate::{
+ ext_slice::ByteSlice,
+ unicode::fsm::sentence_break_fwd::SENTENCE_BREAK_FWD, utf8,
+};
+
+/// An iterator over sentences in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentences`](trait.ByteSlice.html#method.sentences).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct Sentences<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Sentences<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Sentences<'a> {
+ Sentences { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentences();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Sentences<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (sentence, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(sentence)
+ }
+}
+
+/// An iterator over sentences in a byte string, along with their byte offsets.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::sentence_indices`](trait.ByteSlice.html#method.sentence_indices).
+///
+/// Sentences typically include their trailing punctuation and whitespace.
+///
+/// Since sentences are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// sentence yielded with those indices. For example, when this iterator
+/// encounters `\xFF` in the byte string, then it will yield a pair of indices
+/// ranging over a single byte, but will provide an `&str` equivalent to
+/// `"\u{FFFD}"`, which is three bytes in length. However, when given only
+/// valid UTF-8, then all indices are in exact correspondence with their paired
+/// word.
+///
+/// This iterator yields words in accordance with the default sentence boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Sentence_Boundaries).
+#[derive(Clone, Debug)]
+pub struct SentenceIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> SentenceIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> SentenceIndices<'a> {
+ SentenceIndices { bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"I want this. Not that. Right now.".sentence_indices();
+ ///
+ /// assert_eq!(&b"I want this. Not that. Right now."[..], it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"Not that. Right now.", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for SentenceIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_sentence(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_sentence(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = SENTENCE_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let sentence = unsafe { bs[..end].to_str_unchecked() };
+ (sentence, sentence.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ #[cfg(not(miri))]
+ use ucd_parse::SentenceBreakTest;
+
+ use crate::ext_slice::ByteSlice;
+
+ #[test]
+ #[cfg(not(miri))]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.sentences.concat();
+ let got = sentences(given.as_bytes());
+ assert_eq!(
+ test.sentences,
+ got,
+ "\n\nsentence forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.sentences),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a.. ", "A"], sentences(b"a.. A"));
+ assert_eq!(vec!["a.. a"], sentences(b"a.. a"));
+
+ assert_eq!(vec!["a... ", "A"], sentences(b"a... A"));
+ assert_eq!(vec!["a... a"], sentences(b"a... a"));
+
+ assert_eq!(vec!["a...,..., a"], sentences(b"a...,..., a"));
+ }
+
+ fn sentences(bytes: &[u8]) -> Vec<&str> {
+ bytes.sentences().collect()
+ }
+
+ #[cfg(not(miri))]
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for sentence breaks.
+ #[cfg(not(miri))]
+ fn ucdtests() -> Vec<SentenceBreakTest> {
+ const TESTDATA: &'static str =
+ include_str!("data/SentenceBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/unicode/whitespace.rs b/vendor/bstr/src/unicode/whitespace.rs
new file mode 100644
index 000000000..b5eff300e
--- /dev/null
+++ b/vendor/bstr/src/unicode/whitespace.rs
@@ -0,0 +1,16 @@
+use regex_automata::DFA;
+
+use crate::unicode::fsm::{
+ whitespace_anchored_fwd::WHITESPACE_ANCHORED_FWD,
+ whitespace_anchored_rev::WHITESPACE_ANCHORED_REV,
+};
+
+/// Return the first position of a non-whitespace character.
+pub fn whitespace_len_fwd(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_FWD.find(slice).unwrap_or(0)
+}
+
+/// Return the last position of a non-whitespace character.
+pub fn whitespace_len_rev(slice: &[u8]) -> usize {
+ WHITESPACE_ANCHORED_REV.rfind(slice).unwrap_or(slice.len())
+}
diff --git a/vendor/bstr/src/unicode/word.rs b/vendor/bstr/src/unicode/word.rs
new file mode 100644
index 000000000..849f0c8e2
--- /dev/null
+++ b/vendor/bstr/src/unicode/word.rs
@@ -0,0 +1,420 @@
+use regex_automata::DFA;
+
+use crate::{
+ ext_slice::ByteSlice,
+ unicode::fsm::{
+ simple_word_fwd::SIMPLE_WORD_FWD, word_break_fwd::WORD_BREAK_FWD,
+ },
+ utf8,
+};
+
+/// An iterator over words in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words`](trait.ByteSlice.html#method.words).
+///
+/// This is similar to the [`WordsWithBreaks`](struct.WordsWithBreaks.html)
+/// iterator, except it only returns elements that contain a "word" character.
+/// A word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct Words<'a>(WordsWithBreaks<'a>);
+
+impl<'a> Words<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Words<'a> {
+ Words(WordsWithBreaks::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for Words<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ while let Some(word) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some(word);
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over words in a byte string and their byte index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::word_indices`](trait.ByteSlice.html#method.word_indices).
+///
+/// This is similar to the
+/// [`WordsWithBreakIndices`](struct.WordsWithBreakIndices.html) iterator,
+/// except it only returns elements that contain a "word" character. A
+/// word character is defined by UTS #18 (Annex C) to be the combination
+/// of the `Alphabetic` and `Join_Control` properties, along with the
+/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general categories.
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordIndices<'a>(WordsWithBreakIndices<'a>);
+
+impl<'a> WordIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordIndices<'a> {
+ WordIndices(WordsWithBreakIndices::new(bs))
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".word_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.0.as_bytes()
+ }
+}
+
+impl<'a> Iterator for WordIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ while let Some((start, end, word)) = self.0.next() {
+ if SIMPLE_WORD_FWD.is_match(word.as_bytes()) {
+ return Some((start, end, word));
+ }
+ }
+ None
+ }
+}
+
+/// An iterator over all word breaks in a byte string.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator yields
+/// `&str` elements. When invalid UTF-8 is encountered, replacement codepoints
+/// are [substituted](index.html#handling-of-invalid-utf-8).
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreaks<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> WordsWithBreaks<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreaks<'a> {
+ WordsWithBreaks { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_breaks();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreaks<'a> {
+ type Item = &'a str;
+
+ #[inline]
+ fn next(&mut self) -> Option<&'a str> {
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(word)
+ }
+}
+
+/// An iterator over all word breaks in a byte string, along with their byte
+/// index positions.
+///
+/// This iterator is typically constructed by
+/// [`ByteSlice::words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices).
+///
+/// This iterator yields not only all words, but the content that comes between
+/// words. In particular, if all elements yielded by this iterator are
+/// concatenated, then the result is the original string (subject to Unicode
+/// replacement codepoint substitutions).
+///
+/// Since words are made up of one or more codepoints, this iterator
+/// yields `&str` elements (along with their start and end byte offsets).
+/// When invalid UTF-8 is encountered, replacement codepoints are
+/// [substituted](index.html#handling-of-invalid-utf-8). Because of this, the
+/// indices yielded by this iterator may not correspond to the length of the
+/// word yielded with those indices. For example, when this iterator encounters
+/// `\xFF` in the byte string, then it will yield a pair of indices ranging
+/// over a single byte, but will provide an `&str` equivalent to `"\u{FFFD}"`,
+/// which is three bytes in length. However, when given only valid UTF-8, then
+/// all indices are in exact correspondence with their paired word.
+///
+/// This iterator yields words in accordance with the default word boundary
+/// rules specified in
+/// [UAX #29](https://www.unicode.org/reports/tr29/tr29-33.html#Word_Boundaries).
+/// In particular, this may not be suitable for Japanese and Chinese scripts
+/// that do not use spaces between words.
+#[derive(Clone, Debug)]
+pub struct WordsWithBreakIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+}
+
+impl<'a> WordsWithBreakIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> WordsWithBreakIndices<'a> {
+ WordsWithBreakIndices { bs, forward_index: 0 }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"foo bar baz".words_with_break_indices();
+ ///
+ /// assert_eq!(b"foo bar baz", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b" bar baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b" baz", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for WordsWithBreakIndices<'a> {
+ type Item = (usize, usize, &'a str);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, &'a str)> {
+ let index = self.forward_index;
+ let (word, size) = decode_word(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, word))
+ }
+}
+
+fn decode_word(bs: &[u8]) -> (&str, usize) {
+ if bs.is_empty() {
+ ("", 0)
+ } else if let Some(end) = WORD_BREAK_FWD.find(bs) {
+ // Safe because a match can only occur for valid UTF-8.
+ let word = unsafe { bs[..end].to_str_unchecked() };
+ (word, word.len())
+ } else {
+ const INVALID: &'static str = "\u{FFFD}";
+ // No match on non-empty bytes implies we found invalid UTF-8.
+ let (_, size) = utf8::decode_lossy(bs);
+ (INVALID, size)
+ }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ #[cfg(not(miri))]
+ use ucd_parse::WordBreakTest;
+
+ use crate::ext_slice::ByteSlice;
+
+ #[test]
+ #[cfg(not(miri))]
+ fn forward_ucd() {
+ for (i, test) in ucdtests().into_iter().enumerate() {
+ let given = test.words.concat();
+ let got = words(given.as_bytes());
+ assert_eq!(
+ test.words,
+ got,
+ "\n\nword forward break test {} failed:\n\
+ given: {:?}\n\
+ expected: {:?}\n\
+ got: {:?}\n",
+ i,
+ given,
+ strs_to_bstrs(&test.words),
+ strs_to_bstrs(&got),
+ );
+ }
+ }
+
+ // Some additional tests that don't seem to be covered by the UCD tests.
+ //
+ // It's pretty amazing that the UCD tests miss these cases. I only found
+ // them by running this crate's segmenter and ICU's segmenter on the same
+ // text and comparing the output.
+ #[test]
+ fn forward_additional() {
+ assert_eq!(vec!["a", ".", " ", "Y"], words(b"a. Y"));
+ assert_eq!(vec!["r", ".", " ", "Yo"], words(b"r. Yo"));
+ assert_eq!(
+ vec!["whatsoever", ".", " ", "You", " ", "may"],
+ words(b"whatsoever. You may")
+ );
+ assert_eq!(
+ vec!["21stcentury'syesterday"],
+ words(b"21stcentury'syesterday")
+ );
+
+ assert_eq!(vec!["Bonta_", "'", "s"], words(b"Bonta_'s"));
+ assert_eq!(vec!["_vhat's"], words(b"_vhat's"));
+ assert_eq!(vec!["__on'anima"], words(b"__on'anima"));
+ assert_eq!(vec!["123_", "'", "4"], words(b"123_'4"));
+ assert_eq!(vec!["_123'4"], words(b"_123'4"));
+ assert_eq!(vec!["__12'345"], words(b"__12'345"));
+
+ assert_eq!(
+ vec!["tomorrowat4", ":", "00", ","],
+ words(b"tomorrowat4:00,")
+ );
+ assert_eq!(vec!["RS1", "'", "s"], words(b"RS1's"));
+ assert_eq!(vec!["X38"], words(b"X38"));
+
+ assert_eq!(vec!["4abc", ":", "00", ","], words(b"4abc:00,"));
+ assert_eq!(vec!["12S", "'", "1"], words(b"12S'1"));
+ assert_eq!(vec!["1XY"], words(b"1XY"));
+
+ assert_eq!(vec!["\u{FEFF}", "Ты"], words("\u{FEFF}Ты".as_bytes()));
+
+ // Tests that Vithkuqi works, which was introduced in Unicode 14.
+ // This test fails prior to Unicode 14.
+ assert_eq!(
+ vec!["\u{10570}\u{10597}"],
+ words("\u{10570}\u{10597}".as_bytes())
+ );
+ }
+
+ fn words(bytes: &[u8]) -> Vec<&str> {
+ bytes.words_with_breaks().collect()
+ }
+
+ #[cfg(not(miri))]
+ fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
+ strs.iter().map(|s| s.as_ref().as_bytes()).collect()
+ }
+
+ /// Return all of the UCD for word breaks.
+ #[cfg(not(miri))]
+ fn ucdtests() -> Vec<WordBreakTest> {
+ const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
+
+ let mut tests = vec![];
+ for mut line in TESTDATA.lines() {
+ line = line.trim();
+ if line.starts_with("#") || line.contains("surrogate") {
+ continue;
+ }
+ tests.push(line.parse().unwrap());
+ }
+ tests
+ }
+}
diff --git a/vendor/bstr/src/utf8.rs b/vendor/bstr/src/utf8.rs
new file mode 100644
index 000000000..4b5bc20c3
--- /dev/null
+++ b/vendor/bstr/src/utf8.rs
@@ -0,0 +1,1369 @@
+use core::{char, cmp, fmt, str};
+
+#[cfg(feature = "std")]
+use std::error;
+
+use crate::{ascii, bstr::BStr, ext_slice::ByteSlice};
+
+// The UTF-8 decoder provided here is based on the one presented here:
+// https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+//
+// We *could* have done UTF-8 decoding by using a DFA generated by `\p{any}`
+// using regex-automata that is roughly the same size. The real benefit of
+// Hoehrmann's formulation is that the byte class mapping below is manually
+// tailored such that each byte's class doubles as a shift to mask out the
+// bits necessary for constructing the leading bits of each codepoint value
+// from the initial byte.
+//
+// There are some minor differences between this implementation and Hoehrmann's
+// formulation.
+//
+// Firstly, we make REJECT have state ID 0, since it makes the state table
+// itself a little easier to read and is consistent with the notion that 0
+// means "false" or "bad."
+//
+// Secondly, when doing bulk decoding, we add a SIMD accelerated ASCII fast
+// path.
+//
+// Thirdly, we pre-multiply the state IDs to avoid a multiplication instruction
+// in the core decoding loop. (Which is what regex-automata would do by
+// default.)
+//
+// Fourthly, we split the byte class mapping and transition table into two
+// arrays because it's clearer.
+//
+// It is unlikely that this is the fastest way to do UTF-8 decoding, however,
+// it is fairly simple.
+
+const ACCEPT: usize = 12;
+const REJECT: usize = 0;
+
+/// SAFETY: The decode below function relies on the correctness of these
+/// equivalence classes.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const CLASSES: [u8; 256] = [
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+];
+
+/// SAFETY: The decode below function relies on the correctness of this state
+/// machine.
+#[cfg_attr(rustfmt, rustfmt::skip)]
+const STATES_FORWARD: &'static [u8] = &[
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72,
+ 0, 12, 0, 0, 0, 0, 0, 12, 0, 12, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 24, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0,
+ 0, 24, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 36, 0, 36, 0, 0,
+ 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+/// An iterator over Unicode scalar values in a byte string.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](https://www.unicode.org/review/pr-121.html).
+///
+/// This iterator is created by the
+/// [`chars`](trait.ByteSlice.html#method.chars) method provided by the
+/// [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct Chars<'a> {
+ bs: &'a [u8],
+}
+
+impl<'a> Chars<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> Chars<'a> {
+ Chars { bs }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut chars = b"abc".chars();
+ ///
+ /// assert_eq!(b"abc", chars.as_bytes());
+ /// chars.next();
+ /// assert_eq!(b"bc", chars.as_bytes());
+ /// chars.next();
+ /// chars.next();
+ /// assert_eq!(b"", chars.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for Chars<'a> {
+ type Item = char;
+
+ #[inline]
+ fn next(&mut self) -> Option<char> {
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ Some(ch)
+ }
+}
+
+impl<'a> DoubleEndedIterator for Chars<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<char> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ Some(ch)
+ }
+}
+
+/// An iterator over Unicode scalar values in a byte string and their
+/// byte index positions.
+///
+/// When invalid UTF-8 byte sequences are found, they are substituted with the
+/// Unicode replacement codepoint (`U+FFFD`) using the
+/// ["maximal subpart" strategy](https://www.unicode.org/review/pr-121.html).
+///
+/// Note that this is slightly different from the `CharIndices` iterator
+/// provided by the standard library. Aside from working on possibly invalid
+/// UTF-8, this iterator provides both the corresponding starting and ending
+/// byte indices of each codepoint yielded. The ending position is necessary to
+/// slice the original byte string when invalid UTF-8 bytes are converted into
+/// a Unicode replacement codepoint, since a single replacement codepoint can
+/// substitute anywhere from 1 to 3 invalid bytes (inclusive).
+///
+/// This iterator is created by the
+/// [`char_indices`](trait.ByteSlice.html#method.char_indices) method provided
+/// by the [`ByteSlice`](trait.ByteSlice.html) extension trait for `&[u8]`.
+#[derive(Clone, Debug)]
+pub struct CharIndices<'a> {
+ bs: &'a [u8],
+ forward_index: usize,
+ reverse_index: usize,
+}
+
+impl<'a> CharIndices<'a> {
+ pub(crate) fn new(bs: &'a [u8]) -> CharIndices<'a> {
+ CharIndices { bs, forward_index: 0, reverse_index: bs.len() }
+ }
+
+ /// View the underlying data as a subslice of the original data.
+ ///
+ /// The slice returned has the same lifetime as the original slice, and so
+ /// the iterator can continue to be used while this exists.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let mut it = b"abc".char_indices();
+ ///
+ /// assert_eq!(b"abc", it.as_bytes());
+ /// it.next();
+ /// assert_eq!(b"bc", it.as_bytes());
+ /// it.next();
+ /// it.next();
+ /// assert_eq!(b"", it.as_bytes());
+ /// ```
+ #[inline]
+ pub fn as_bytes(&self) -> &'a [u8] {
+ self.bs
+ }
+}
+
+impl<'a> Iterator for CharIndices<'a> {
+ type Item = (usize, usize, char);
+
+ #[inline]
+ fn next(&mut self) -> Option<(usize, usize, char)> {
+ let index = self.forward_index;
+ let (ch, size) = decode_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[size..];
+ self.forward_index += size;
+ Some((index, index + size, ch))
+ }
+}
+
+impl<'a> DoubleEndedIterator for CharIndices<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<(usize, usize, char)> {
+ let (ch, size) = decode_last_lossy(self.bs);
+ if size == 0 {
+ return None;
+ }
+ self.bs = &self.bs[..self.bs.len() - size];
+ self.reverse_index -= size;
+ Some((self.reverse_index, self.reverse_index + size, ch))
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for CharIndices<'a> {}
+
+/// An iterator over chunks of valid UTF-8 in a byte slice.
+///
+/// See [`utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks).
+#[derive(Clone, Debug)]
+pub struct Utf8Chunks<'a> {
+ pub(super) bytes: &'a [u8],
+}
+
+/// A chunk of valid UTF-8, possibly followed by invalid UTF-8 bytes.
+///
+/// This is yielded by the
+/// [`Utf8Chunks`](struct.Utf8Chunks.html)
+/// iterator, which can be created via the
+/// [`ByteSlice::utf8_chunks`](trait.ByteSlice.html#method.utf8_chunks)
+/// method.
+///
+/// The `'a` lifetime parameter corresponds to the lifetime of the bytes that
+/// are being iterated over.
+#[cfg_attr(test, derive(Debug, PartialEq))]
+pub struct Utf8Chunk<'a> {
+ /// A valid UTF-8 piece, at the start, end, or between invalid UTF-8 bytes.
+ ///
+ /// This is empty between adjacent invalid UTF-8 byte sequences.
+ valid: &'a str,
+ /// A sequence of invalid UTF-8 bytes.
+ ///
+ /// Can only be empty in the last chunk.
+ ///
+ /// Should be replaced by a single unicode replacement character, if not
+ /// empty.
+ invalid: &'a BStr,
+ /// Indicates whether the invalid sequence could've been valid if there
+ /// were more bytes.
+ ///
+ /// Can only be true in the last chunk.
+ incomplete: bool,
+}
+
+impl<'a> Utf8Chunk<'a> {
+ /// Returns the (possibly empty) valid UTF-8 bytes in this chunk.
+ ///
+ /// This may be empty if there are consecutive sequences of invalid UTF-8
+ /// bytes.
+ #[inline]
+ pub fn valid(&self) -> &'a str {
+ self.valid
+ }
+
+ /// Returns the (possibly empty) invalid UTF-8 bytes in this chunk that
+ /// immediately follow the valid UTF-8 bytes in this chunk.
+ ///
+ /// This is only empty when this chunk corresponds to the last chunk in
+ /// the original bytes.
+ ///
+ /// The maximum length of this slice is 3. That is, invalid UTF-8 byte
+ /// sequences greater than 1 always correspond to a valid _prefix_ of
+ /// a valid UTF-8 encoded codepoint. This corresponds to the "substitution
+ /// of maximal subparts" strategy that is described in more detail in the
+ /// docs for the
+ /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
+ /// method.
+ #[inline]
+ pub fn invalid(&self) -> &'a [u8] {
+ self.invalid.as_bytes()
+ }
+
+ /// Returns whether the invalid sequence might still become valid if more
+ /// bytes are added.
+ ///
+ /// Returns true if the end of the input was reached unexpectedly,
+ /// without encountering an unexpected byte.
+ ///
+ /// This can only be the case for the last chunk.
+ #[inline]
+ pub fn incomplete(&self) -> bool {
+ self.incomplete
+ }
+}
+
+impl<'a> Iterator for Utf8Chunks<'a> {
+ type Item = Utf8Chunk<'a>;
+
+ #[inline]
+ fn next(&mut self) -> Option<Utf8Chunk<'a>> {
+ if self.bytes.is_empty() {
+ return None;
+ }
+ match validate(self.bytes) {
+ Ok(()) => {
+ let valid = self.bytes;
+ self.bytes = &[];
+ Some(Utf8Chunk {
+ // SAFETY: This is safe because of the guarantees provided
+ // by utf8::validate.
+ valid: unsafe { str::from_utf8_unchecked(valid) },
+ invalid: [].as_bstr(),
+ incomplete: false,
+ })
+ }
+ Err(e) => {
+ let (valid, rest) = self.bytes.split_at(e.valid_up_to());
+ // SAFETY: This is safe because of the guarantees provided by
+ // utf8::validate.
+ let valid = unsafe { str::from_utf8_unchecked(valid) };
+ let (invalid_len, incomplete) = match e.error_len() {
+ Some(n) => (n, false),
+ None => (rest.len(), true),
+ };
+ let (invalid, rest) = rest.split_at(invalid_len);
+ self.bytes = rest;
+ Some(Utf8Chunk {
+ valid,
+ invalid: invalid.as_bstr(),
+ incomplete,
+ })
+ }
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.bytes.is_empty() {
+ (0, Some(0))
+ } else {
+ (1, Some(self.bytes.len()))
+ }
+ }
+}
+
+impl<'a> ::core::iter::FusedIterator for Utf8Chunks<'a> {}
+
+/// An error that occurs when UTF-8 decoding fails.
+///
+/// This error occurs when attempting to convert a non-UTF-8 byte
+/// string to a Rust string that must be valid UTF-8. For example,
+/// [`to_str`](trait.ByteSlice.html#method.to_str) is one such method.
+///
+/// # Example
+///
+/// This example shows what happens when a given byte sequence is invalid,
+/// but ends with a sequence that is a possible prefix of valid UTF-8.
+///
+/// ```
+/// use bstr::{B, ByteSlice};
+///
+/// let s = B(b"foobar\xF1\x80\x80");
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), None);
+/// ```
+///
+/// This example shows what happens when a given byte sequence contains
+/// invalid UTF-8.
+///
+/// ```
+/// use bstr::ByteSlice;
+///
+/// let s = b"foobar\xF1\x80\x80quux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// // The error length reports the maximum number of bytes that correspond to
+/// // a valid prefix of a UTF-8 encoded codepoint.
+/// assert_eq!(err.error_len(), Some(3));
+///
+/// // In contrast to the above which contains a single invalid prefix,
+/// // consider the case of multiple individual bytes that are never valid
+/// // prefixes. Note how the value of error_len changes!
+/// let s = b"foobar\xFF\xFFquux";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+///
+/// // The fact that it's an invalid prefix does not change error_len even
+/// // when it immediately precedes the end of the string.
+/// let s = b"foobar\xFF";
+/// let err = s.to_str().unwrap_err();
+/// assert_eq!(err.valid_up_to(), 6);
+/// assert_eq!(err.error_len(), Some(1));
+/// ```
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Utf8Error {
+ valid_up_to: usize,
+ error_len: Option<usize>,
+}
+
+impl Utf8Error {
+ /// Returns the byte index of the position immediately following the last
+ /// valid UTF-8 byte.
+ ///
+ /// # Example
+ ///
+ /// This examples shows how `valid_up_to` can be used to retrieve a
+ /// possibly empty prefix that is guaranteed to be valid UTF-8:
+ ///
+ /// ```
+ /// use bstr::ByteSlice;
+ ///
+ /// let s = b"foobar\xF1\x80\x80quux";
+ /// let err = s.to_str().unwrap_err();
+ ///
+ /// // This is guaranteed to never panic.
+ /// let string = s[..err.valid_up_to()].to_str().unwrap();
+ /// assert_eq!(string, "foobar");
+ /// ```
+ #[inline]
+ pub fn valid_up_to(&self) -> usize {
+ self.valid_up_to
+ }
+
+ /// Returns the total number of invalid UTF-8 bytes immediately following
+ /// the position returned by `valid_up_to`. This value is always at least
+ /// `1`, but can be up to `3` if bytes form a valid prefix of some UTF-8
+ /// encoded codepoint.
+ ///
+ /// If the end of the original input was found before a valid UTF-8 encoded
+ /// codepoint could be completed, then this returns `None`. This is useful
+ /// when processing streams, where a `None` value signals that more input
+ /// might be needed.
+ #[inline]
+ pub fn error_len(&self) -> Option<usize> {
+ self.error_len
+ }
+}
+
+#[cfg(feature = "std")]
+impl error::Error for Utf8Error {
+ fn description(&self) -> &str {
+ "invalid UTF-8"
+ }
+}
+
+impl fmt::Display for Utf8Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "invalid UTF-8 found at byte offset {}", self.valid_up_to)
+ }
+}
+
+/// Returns OK if and only if the given slice is completely valid UTF-8.
+///
+/// If the slice isn't valid UTF-8, then an error is returned that explains
+/// the first location at which invalid UTF-8 was detected.
+pub fn validate(slice: &[u8]) -> Result<(), Utf8Error> {
+ // The fast path for validating UTF-8. It steps through a UTF-8 automaton
+ // and uses a SIMD accelerated ASCII fast path on x86_64. If an error is
+ // detected, it backs up and runs the slower version of the UTF-8 automaton
+ // to determine correct error information.
+ fn fast(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut i = 0;
+
+ while i < slice.len() {
+ let b = slice[i];
+
+ // ASCII fast path. If we see two consecutive ASCII bytes, then try
+ // to validate as much ASCII as possible very quickly.
+ if state == ACCEPT
+ && b <= 0x7F
+ && slice.get(i + 1).map_or(false, |&b| b <= 0x7F)
+ {
+ i += ascii::first_non_ascii_byte(&slice[i..]);
+ continue;
+ }
+
+ state = step(state, b);
+ if state == REJECT {
+ return Err(find_valid_up_to(slice, i));
+ }
+ i += 1;
+ }
+ if state != ACCEPT {
+ Err(find_valid_up_to(slice, slice.len()))
+ } else {
+ Ok(())
+ }
+ }
+
+ // Given the first position at which a UTF-8 sequence was determined to be
+ // invalid, return an error that correctly reports the position at which
+ // the last complete UTF-8 sequence ends.
+ #[inline(never)]
+ fn find_valid_up_to(slice: &[u8], rejected_at: usize) -> Utf8Error {
+ // In order to find the last valid byte, we need to back up an amount
+ // that guarantees every preceding byte is part of a valid UTF-8
+ // code unit sequence. To do this, we simply locate the last leading
+ // byte that occurs before rejected_at.
+ let mut backup = rejected_at.saturating_sub(1);
+ while backup > 0 && !is_leading_or_invalid_utf8_byte(slice[backup]) {
+ backup -= 1;
+ }
+ let upto = cmp::min(slice.len(), rejected_at.saturating_add(1));
+ let mut err = slow(&slice[backup..upto]).unwrap_err();
+ err.valid_up_to += backup;
+ err
+ }
+
+ // Like top-level UTF-8 decoding, except it correctly reports a UTF-8 error
+ // when an invalid sequence is found. This is split out from validate so
+ // that the fast path doesn't need to keep track of the position of the
+ // last valid UTF-8 byte. In particular, tracking this requires checking
+ // for an ACCEPT state on each byte, which degrades throughput pretty
+ // badly.
+ fn slow(slice: &[u8]) -> Result<(), Utf8Error> {
+ let mut state = ACCEPT;
+ let mut valid_up_to = 0;
+ for (i, &b) in slice.iter().enumerate() {
+ state = step(state, b);
+ if state == ACCEPT {
+ valid_up_to = i + 1;
+ } else if state == REJECT {
+ // Our error length must always be at least 1.
+ let error_len = Some(cmp::max(1, i - valid_up_to));
+ return Err(Utf8Error { valid_up_to, error_len });
+ }
+ }
+ if state != ACCEPT {
+ Err(Utf8Error { valid_up_to, error_len: None })
+ } else {
+ Ok(())
+ }
+ }
+
+ // Advance to the next state given the current state and current byte.
+ fn step(state: usize, b: u8) -> usize {
+ let class = CLASSES[b as usize];
+ // SAFETY: This is safe because 'class' is always <=11 and 'state' is
+ // always <=96. Therefore, the maximal index is 96+11 = 107, where
+ // STATES_FORWARD.len() = 108 such that every index is guaranteed to be
+ // valid by construction of the state machine and the byte equivalence
+ // classes.
+ unsafe {
+ *STATES_FORWARD.get_unchecked(state + class as usize) as usize
+ }
+ }
+
+ fast(slice)
+}
+
+/// UTF-8 decode a single Unicode scalar value from the beginning of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```
+/// use bstr::{B, decode_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ let slice = slice.as_ref();
+ match slice.get(0) {
+ None => return (None, 0),
+ Some(&b) if b <= 0x7F => return (Some(b as char), 1),
+ _ => {}
+ }
+
+ let (mut state, mut cp, mut i) = (ACCEPT, 0, 0);
+ while i < slice.len() {
+ decode_step(&mut state, &mut cp, slice[i]);
+ i += 1;
+
+ if state == ACCEPT {
+ // SAFETY: This is safe because `decode_step` guarantees that
+ // `cp` is a valid Unicode scalar value in an ACCEPT state.
+ let ch = unsafe { char::from_u32_unchecked(cp) };
+ return (Some(ch), i);
+ } else if state == REJECT {
+ // At this point, we always want to advance at least one byte.
+ return (None, cmp::max(1, i.saturating_sub(1)));
+ }
+ }
+ (None, i)
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the beginning of a
+/// slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes, while replacing invalid UTF-8 sequences with the replacement
+/// codepoint:
+///
+/// ```ignore
+/// use bstr::{B, decode_utf8_lossy};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_utf8_lossy(bytes);
+/// bytes = &bytes[size..];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
+/// ```
+#[inline]
+pub fn decode_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, `None` is returned along with the number of bytes that
+/// make up a maximal prefix of a valid UTF-8 code unit sequence. In this case,
+/// the number of bytes consumed is always between 0 and 3, inclusive, where
+/// 0 is only returned when `slice` is empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// use bstr::decode_last_utf8;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98\x83");
+/// assert_eq!(Some('☃'), ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8(b"\xE2\x98");
+/// assert_eq!(None, ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```
+/// use bstr::{B, decode_last_utf8};
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch.unwrap_or('\u{FFFD}'));
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last<B: AsRef<[u8]>>(slice: B) -> (Option<char>, usize) {
+ // TODO: We could implement this by reversing the UTF-8 automaton, but for
+ // now, we do it the slow way by using the forward automaton.
+
+ let slice = slice.as_ref();
+ if slice.is_empty() {
+ return (None, 0);
+ }
+ let mut start = slice.len() - 1;
+ let limit = slice.len().saturating_sub(4);
+ while start > limit && !is_leading_or_invalid_utf8_byte(slice[start]) {
+ start -= 1;
+ }
+ let (ch, size) = decode(&slice[start..]);
+ // If we didn't consume all of the bytes, then that means there's at least
+ // one stray byte that never occurs in a valid code unit prefix, so we can
+ // advance by one byte.
+ if start + size != slice.len() {
+ (None, 1)
+ } else {
+ (ch, size)
+ }
+}
+
+/// Lossily UTF-8 decode a single Unicode scalar value from the end of a slice.
+///
+/// When successful, the corresponding Unicode scalar value is returned along
+/// with the number of bytes it was encoded with. The number of bytes consumed
+/// for a successful decode is always between 1 and 4, inclusive.
+///
+/// When unsuccessful, the Unicode replacement codepoint (`U+FFFD`) is returned
+/// along with the number of bytes that make up a maximal prefix of a valid
+/// UTF-8 code unit sequence. In this case, the number of bytes consumed is
+/// always between 0 and 3, inclusive, where 0 is only returned when `slice` is
+/// empty.
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// // Decoding a valid codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98\x83");
+/// assert_eq!('☃', ch);
+/// assert_eq!(3, size);
+///
+/// // Decoding an incomplete codepoint.
+/// let (ch, size) = decode_last_utf8_lossy(b"\xE2\x98");
+/// assert_eq!('\u{FFFD}', ch);
+/// assert_eq!(2, size);
+/// ```
+///
+/// This example shows how to iterate over all codepoints in UTF-8 encoded
+/// bytes in reverse, while replacing invalid UTF-8 sequences with the
+/// replacement codepoint:
+///
+/// ```ignore
+/// use bstr::decode_last_utf8_lossy;
+///
+/// let mut bytes = B(b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61");
+/// let mut chars = vec![];
+/// while !bytes.is_empty() {
+/// let (ch, size) = decode_last_utf8_lossy(bytes);
+/// bytes = &bytes[..bytes.len()-size];
+/// chars.push(ch);
+/// }
+/// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
+/// ```
+#[inline]
+pub fn decode_last_lossy<B: AsRef<[u8]>>(slice: B) -> (char, usize) {
+ match decode_last(slice) {
+ (Some(ch), size) => (ch, size),
+ (None, size) => ('\u{FFFD}', size),
+ }
+}
+
+/// SAFETY: The decode function relies on state being equal to ACCEPT only if
+/// cp is a valid Unicode scalar value.
+#[inline]
+pub fn decode_step(state: &mut usize, cp: &mut u32, b: u8) {
+ let class = CLASSES[b as usize];
+ if *state == ACCEPT {
+ *cp = (0xFF >> class) & (b as u32);
+ } else {
+ *cp = (b as u32 & 0b111111) | (*cp << 6);
+ }
+ *state = STATES_FORWARD[*state + class as usize] as usize;
+}
+
+/// Returns true if and only if the given byte is either a valid leading UTF-8
+/// byte, or is otherwise an invalid byte that can never appear anywhere in a
+/// valid UTF-8 sequence.
+fn is_leading_or_invalid_utf8_byte(b: u8) -> bool {
+ // In the ASCII case, the most significant bit is never set. The leading
+ // byte of a 2/3/4-byte sequence always has the top two most significant
+ // bits set. For bytes that can never appear anywhere in valid UTF-8, this
+ // also returns true, since every such byte has its two most significant
+ // bits set:
+ //
+ // \xC0 :: 11000000
+ // \xC1 :: 11000001
+ // \xF5 :: 11110101
+ // \xF6 :: 11110110
+ // \xF7 :: 11110111
+ // \xF8 :: 11111000
+ // \xF9 :: 11111001
+ // \xFA :: 11111010
+ // \xFB :: 11111011
+ // \xFC :: 11111100
+ // \xFD :: 11111101
+ // \xFE :: 11111110
+ // \xFF :: 11111111
+ (b & 0b1100_0000) != 0b1000_0000
+}
+
+#[cfg(all(test, feature = "std"))]
+mod tests {
+ use std::char;
+
+ use crate::{
+ ext_slice::{ByteSlice, B},
+ tests::LOSSY_TESTS,
+ utf8::{self, Utf8Error},
+ };
+
+ fn utf8e(valid_up_to: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: None }
+ }
+
+ fn utf8e2(valid_up_to: usize, error_len: usize) -> Utf8Error {
+ Utf8Error { valid_up_to, error_len: Some(error_len) }
+ }
+
+ #[test]
+ #[cfg(not(miri))]
+ fn validate_all_codepoints() {
+ for i in 0..(0x10FFFF + 1) {
+ let cp = match char::from_u32(i) {
+ None => continue,
+ Some(cp) => cp,
+ };
+ let mut buf = [0; 4];
+ let s = cp.encode_utf8(&mut buf);
+ assert_eq!(Ok(()), utf8::validate(s.as_bytes()));
+ }
+ }
+
+ #[test]
+ fn validate_multiple_codepoints() {
+ assert_eq!(Ok(()), utf8::validate(b"abc"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xE2\x98\x83a"));
+ assert_eq!(Ok(()), utf8::validate(b"a\xF0\x9D\x9C\xB7a"));
+ assert_eq!(Ok(()), utf8::validate(b"\xE2\x98\x83\xF0\x9D\x9C\xB7",));
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"a\xE2\x98\x83a\xF0\x9D\x9C\xB7a",)
+ );
+ assert_eq!(
+ Ok(()),
+ utf8::validate(b"\xEF\xBF\xBD\xE2\x98\x83\xEF\xBF\xBD",)
+ );
+ }
+
+ #[test]
+ fn validate_errors() {
+ // single invalid byte
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xFF"));
+ // single invalid byte after ASCII
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xFF"));
+ // single invalid byte after 2 byte sequence
+ assert_eq!(Err(utf8e2(2, 1)), utf8::validate(b"\xCE\xB2\xFF"));
+ // single invalid byte after 3 byte sequence
+ assert_eq!(Err(utf8e2(3, 1)), utf8::validate(b"\xE2\x98\x83\xFF"));
+ // single invalid byte after 4 byte sequence
+ assert_eq!(Err(utf8e2(4, 1)), utf8::validate(b"\xF0\x9D\x9D\xB1\xFF"));
+
+ // An invalid 2-byte sequence with a valid 1-byte prefix.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCE\xF0"));
+ // An invalid 3-byte sequence with a valid 2-byte prefix.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98\xF0"));
+ // An invalid 4-byte sequence with a valid 3-byte prefix.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9D\xF0"));
+
+ // An overlong sequence. Should be \xE2\x82\xAC, but we encode the
+ // same codepoint value in 4 bytes. This not only tests that we reject
+ // overlong sequences, but that we get valid_up_to correct.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xF0\x82\x82\xAC"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xF0\x82\x82\xAC"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xF0\x82\x82\xAC",)
+ );
+
+ // Check that encoding a surrogate codepoint using the UTF-8 scheme
+ // fails validation.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xED\xA0\x80"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xED\xA0\x80"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xED\xA0\x80",)
+ );
+
+ // Check that an incomplete 2-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 1)), utf8::validate(b"\xCEa"));
+ assert_eq!(Err(utf8e2(1, 1)), utf8::validate(b"a\xCEa"));
+ assert_eq!(
+ Err(utf8e2(3, 1)),
+ utf8::validate(b"\xE2\x98\x83\xCE\xE2\x98\x83",)
+ );
+ // Check that an incomplete 3-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 2)), utf8::validate(b"\xE2\x98a"));
+ assert_eq!(Err(utf8e2(1, 2)), utf8::validate(b"a\xE2\x98a"));
+ assert_eq!(
+ Err(utf8e2(3, 2)),
+ utf8::validate(b"\xE2\x98\x83\xE2\x98\xE2\x98\x83",)
+ );
+ // Check that an incomplete 4-byte sequence fails.
+ assert_eq!(Err(utf8e2(0, 3)), utf8::validate(b"\xF0\x9D\x9Ca"));
+ assert_eq!(Err(utf8e2(1, 3)), utf8::validate(b"a\xF0\x9D\x9Ca"));
+ assert_eq!(
+ Err(utf8e2(4, 3)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C\xE2\x98\x83",)
+ );
+ assert_eq!(
+ Err(utf8e2(6, 3)),
+ utf8::validate(b"foobar\xF1\x80\x80quux",)
+ );
+
+ // Check that an incomplete (EOF) 2-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xCE"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xCE"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xCE"));
+ // Check that an incomplete (EOF) 3-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xE2\x98"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xE2\x98"));
+ assert_eq!(Err(utf8e(3)), utf8::validate(b"\xE2\x98\x83\xE2\x98"));
+ // Check that an incomplete (EOF) 4-byte sequence fails.
+ assert_eq!(Err(utf8e(0)), utf8::validate(b"\xF0\x9D\x9C"));
+ assert_eq!(Err(utf8e(1)), utf8::validate(b"a\xF0\x9D\x9C"));
+ assert_eq!(
+ Err(utf8e(4)),
+ utf8::validate(b"\xF0\x9D\x9C\xB1\xF0\x9D\x9C",)
+ );
+
+ // Test that we errors correct even after long valid sequences. This
+ // checks that our "backup" logic for detecting errors is correct.
+ assert_eq!(
+ Err(utf8e2(8, 1)),
+ utf8::validate(b"\xe2\x98\x83\xce\xb2\xe3\x83\x84\xFF",)
+ );
+ }
+
+ #[test]
+ fn decode_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode(s.as_bytes());
+ s = &s[size..];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['α', 'β', 'γ', 'δ', 'ε'], d("αβγδε"));
+ assert_eq!(vec!['☃', '⛄', '⛇'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗮', '𝗯', '𝗰', '𝗱', '𝗲'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_invalid() {
+ let (ch, size) = utf8::decode(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xCEa");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode(b"\xE2\x98a");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode(b"\xF0\x9D\x9Ca");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_lossy() {
+ let (ch, size) = utf8::decode_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xCEa");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xE2\x98a");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_lossy(b"\xF0\x9D\x9Ca");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_valid() {
+ fn d(mut s: &str) -> Vec<char> {
+ let mut chars = vec![];
+ while !s.is_empty() {
+ let (ch, size) = utf8::decode_last(s.as_bytes());
+ s = &s[..s.len() - size];
+ chars.push(ch.unwrap());
+ }
+ chars
+ }
+
+ assert_eq!(vec!['☃'], d("☃"));
+ assert_eq!(vec!['☃', '☃'], d("☃☃"));
+ assert_eq!(vec!['ε', 'δ', 'γ', 'β', 'α'], d("αβγδε"));
+ assert_eq!(vec!['⛇', '⛄', '☃'], d("☃⛄⛇"));
+ assert_eq!(vec!['𝗲', '𝗱', '𝗰', '𝗯', '𝗮'], d("𝗮𝗯𝗰𝗱𝗲"));
+ }
+
+ #[test]
+ fn decode_last_invalid() {
+ let (ch, size) = utf8::decode_last(b"");
+ assert_eq!(None, ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last(b"\xFF");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D\xF0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x9D\x9D");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last(b"\xF0\x82\x82\xAC");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0\x80");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED\xA0");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"\xED");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xCE");
+ assert_eq!(None, ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xE2\x98");
+ assert_eq!(None, ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last(b"a\xF0\x9D\x9C");
+ assert_eq!(None, ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn decode_last_lossy() {
+ let (ch, size) = utf8::decode_last_lossy(b"");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(0, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xFF");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D\xF0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x9D\x9D");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xF0\x82\x82\xAC");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0\x80");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED\xA0");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"\xED");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xCE");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(1, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xE2\x98");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(2, size);
+
+ let (ch, size) = utf8::decode_last_lossy(b"a\xF0\x9D\x9C");
+ assert_eq!('\u{FFFD}', ch);
+ assert_eq!(3, size);
+ }
+
+ #[test]
+ fn chars() {
+ for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
+ let got: String = B(input).chars().collect();
+ assert_eq!(
+ expected, got,
+ "chars(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices(ith: {:?}, given: {:?})",
+ i, input,
+ );
+
+ let expected: String = expected.chars().rev().collect();
+
+ let got: String = B(input).chars().rev().collect();
+ assert_eq!(
+ expected, got,
+ "chars.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ let got: String =
+ B(input).char_indices().rev().map(|(_, _, ch)| ch).collect();
+ assert_eq!(
+ expected, got,
+ "char_indices.rev(ith: {:?}, given: {:?})",
+ i, input,
+ );
+ }
+ }
+
+ #[test]
+ fn utf8_chunks() {
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xC0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xC0".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xFF\xFF" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "",
+ invalid: b"\xFF".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xD0456" };
+ assert_eq!(
+ (c.next(), c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xD0".as_bstr(),
+ incomplete: false,
+ }),
+ Some(utf8::Utf8Chunk {
+ valid: "456",
+ invalid: b"".as_bstr(),
+ incomplete: false,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xE2\x98" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xE2\x98".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+
+ let mut c = utf8::Utf8Chunks { bytes: b"123\xF4\x8F\xBF" };
+ assert_eq!(
+ (c.next(), c.next()),
+ (
+ Some(utf8::Utf8Chunk {
+ valid: "123",
+ invalid: b"\xF4\x8F\xBF".as_bstr(),
+ incomplete: true,
+ }),
+ None,
+ )
+ );
+ }
+}