// Copyright 2015-2017 Daniel P. Clark & array_tool Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. /// A grapheme iterator that produces the bytes for each grapheme. #[derive(Debug)] pub struct GraphemeBytesIter<'a> { source: &'a str, offset: usize, grapheme_count: usize, } impl<'a> GraphemeBytesIter<'a> { /// Creates a new grapheme iterator from a string source. pub fn new(source: &'a str) -> GraphemeBytesIter<'a> { GraphemeBytesIter { source: source, offset: 0, grapheme_count: 0, } } } impl<'a> Iterator for GraphemeBytesIter<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option<&'a [u8]> { let mut result: Option<&[u8]> = None; let mut idx = self.offset; for _ in self.offset..self.source.len() { idx += 1; if self.offset < self.source.len() { if self.source.is_char_boundary(idx) { let slice: &[u8] = self.source[self.offset..idx].as_bytes(); self.grapheme_count += 1; self.offset = idx; result = Some(slice); break } } } result } } impl<'a> ExactSizeIterator for GraphemeBytesIter<'a> { fn len(&self) -> usize { self.source.chars().count() } } /// ToGraphemeBytesIter - create an iterator to return bytes for each grapheme in a string. pub trait ToGraphemeBytesIter<'a> { /// Returns a GraphemeBytesIter which you may iterate over. /// /// # Example /// ``` /// use array_tool::string::ToGraphemeBytesIter; /// /// let string = "a s—d féZ"; /// let mut graphemes = string.grapheme_bytes_iter(); /// graphemes.skip(3).next(); /// ``` /// /// # Output /// ```text /// [226, 128, 148] /// ``` fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a>; } impl<'a> ToGraphemeBytesIter<'a> for str { fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a> { GraphemeBytesIter::new(&self) } } /// Squeeze - squeezes duplicate characters down to one each pub trait Squeeze { /// # Example /// ``` /// use array_tool::string::Squeeze; /// /// "yellow moon".squeeze(""); /// ``` /// /// # Output /// ```text /// "yelow mon" /// ``` fn squeeze(&self, targets: &'static str) -> String; } impl Squeeze for str { fn squeeze(&self, targets: &'static str) -> String { let mut output = Vec::::with_capacity(self.len()); let everything: bool = targets.is_empty(); let chars = targets.grapheme_bytes_iter().collect::>(); let mut last: &[u8] = &[0]; for character in self.grapheme_bytes_iter() { if last != character { output.extend_from_slice(character); } else if !(everything || chars.contains(&character)) { output.extend_from_slice(character); } last = character; } String::from_utf8(output).expect("squeeze failed to render String!") } } /// Justify - expand line to given width. pub trait Justify { /// # Example /// ``` /// use array_tool::string::Justify; /// /// "asd asdf asd".justify_line(14); /// ``` /// /// # Output /// ```text /// "asd asdf asd" /// ``` fn justify_line(&self, width: usize) -> String; } impl Justify for str { fn justify_line(&self, width: usize) -> String { if self.is_empty() { return format!("{}", self) }; let trimmed = self.trim() ; let len = trimmed.chars().count(); if len >= width { return self.to_string(); }; let difference = width - len; let iter = trimmed.split_whitespace(); let spaces = iter.count() - 1; let mut iter = trimmed.split_whitespace().peekable(); if spaces == 0 { return self.to_string(); } let mut obj = String::with_capacity(trimmed.len() + spaces); let div = difference / spaces; let mut remainder = difference % spaces; while let Some(x) = iter.next() { obj.push_str( x ); let val = if remainder > 0 { remainder = remainder - 1; div + 1 } else { div }; for _ in 0..val+1 { if let Some(_) = iter.peek() { // Don't add spaces if last word obj.push_str( " " ); } } } obj } } /// Substitute string character for each index given. pub trait SubstMarks { /// # Example /// ``` /// use array_tool::string::SubstMarks; /// /// "asdf asdf asdf".subst_marks(vec![0,5,8], "Z"); /// ``` /// /// # Output /// ```text /// "Zsdf ZsdZ asdf" /// ``` fn subst_marks(&self, marks: Vec, chr: &'static str) -> String; } impl SubstMarks for str { fn subst_marks(&self, marks: Vec, chr: &'static str) -> String { let mut output = Vec::::with_capacity(self.len()); let mut count = 0; let mut last = 0; for i in 0..self.len() { let idx = i + 1; if self.is_char_boundary(idx) { if marks.contains(&count) { count += 1; last = idx; output.extend_from_slice(chr.as_bytes()); continue } let slice: &[u8] = self[last..idx].as_bytes(); output.extend_from_slice(slice); count += 1; last = idx } } String::from_utf8(output).expect("subst_marks failed to render String!") } } /// After whitespace pub trait AfterWhitespace { /// Given offset method will seek from there to end of string to find the first /// non white space. Resulting value is counted from offset. /// /// # Example /// ``` /// use array_tool::string::AfterWhitespace; /// /// assert_eq!( /// "asdf asdf asdf".seek_end_of_whitespace(6), /// Some(9) /// ); /// ``` fn seek_end_of_whitespace(&self, offset: usize) -> Option; } impl AfterWhitespace for str { fn seek_end_of_whitespace(&self, offset: usize) -> Option { if self.len() < offset { return None; }; let mut seeker = self[offset..self.len()].chars(); let mut val = None; let mut indx = 0; while let Some(x) = seeker.next() { if x.ne(&" ".chars().next().unwrap()) { val = Some(indx); break; } indx += 1; } val } } /// Word wrapping pub trait WordWrap { /// White space is treated as valid content and new lines will only be swapped in for /// the last white space character at the end of the given width. White space may reach beyond /// the width you've provided. You will need to trim end of lines in your own output (e.g. /// splitting string at each new line and printing the line with trim_right). Or just trust /// that lines that are beyond the width are just white space and only print the width - /// ignoring tailing white space. /// /// # Example /// ``` /// use array_tool::string::WordWrap; /// /// "asd asdf asd".word_wrap(8); /// ``` /// /// # Output /// ```text /// "asd asdf\nasd" /// ``` fn word_wrap(&self, width: usize) -> String; } // No need to worry about character encoding since we're only checking for the // space and new line characters. impl WordWrap for &'static str { fn word_wrap(&self, width: usize) -> String { let mut markers = vec![]; fn wordwrap(t: &'static str, chunk: usize, offset: usize, mrkrs: &mut Vec) -> String { match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind("\n") { None => { match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind(" ") { Some(x) => { let mut eows = x; // end of white space if offset+chunk < t.len() { // check if white space continues match t.seek_end_of_whitespace(offset+x) { Some(a) => { if a.ne(&0) { eows = x+a-1; } }, None => {}, } } if offset+chunk < t.len() { // safe to seek ahead by 1 or not end of string if !["\n".chars().next().unwrap(), " ".chars().next().unwrap()].contains( &t[offset+eows+1..offset+eows+2].chars().next().unwrap() ) { mrkrs.push(offset+eows) } }; wordwrap(t, chunk, offset+eows+1, mrkrs) }, None => { if offset+chunk < t.len() { // String may continue wordwrap(t, chunk, offset+1, mrkrs) // Recurse + 1 until next space } else { use string::SubstMarks; return t.subst_marks(mrkrs.to_vec(), "\n") } }, } }, Some(x) => { wordwrap(t, chunk, offset+x+1, mrkrs) }, } }; wordwrap(self, width+1, 0, &mut markers) } }