diff options
Diffstat (limited to 'vendor/bstr/src/io.rs')
-rw-r--r-- | vendor/bstr/src/io.rs | 514 |
1 files changed, 0 insertions, 514 deletions
diff --git a/vendor/bstr/src/io.rs b/vendor/bstr/src/io.rs deleted file mode 100644 index ad6f3c1bb..000000000 --- a/vendor/bstr/src/io.rs +++ /dev/null @@ -1,514 +0,0 @@ -/*! -Utilities for working with I/O using byte strings. - -This module currently only exports a single trait, `BufReadExt`, which provides -facilities for conveniently and efficiently working with lines as byte strings. - -More APIs may be added in the future. -*/ - -use std::io; - -use crate::ext_slice::ByteSlice; -use crate::ext_vec::ByteVec; - -/// An extention trait for -/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) -/// which provides convenience APIs for dealing with byte strings. -pub trait BufReadExt: io::BufRead { - /// Returns an iterator over the lines of this reader, where each line - /// is represented as a byte string. - /// - /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where - /// an error is yielded if there was a problem reading from the underlying - /// reader. - /// - /// On success, the next line in the iterator is returned. The line does - /// *not* contain a trailing `\n` or `\r\n`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); - /// - /// let mut lines = vec![]; - /// for result in cursor.byte_lines() { - /// let line = result?; - /// lines.push(line); - /// } - /// assert_eq!(lines.len(), 3); - /// assert_eq!(lines[0], "lorem".as_bytes()); - /// assert_eq!(lines[1], "ipsum".as_bytes()); - /// assert_eq!(lines[2], "dolor".as_bytes()); - /// # Ok(()) }; example().unwrap() - /// ``` - fn byte_lines(self) -> ByteLines<Self> - where - Self: Sized, - { - ByteLines { buf: self } - } - - /// Returns an iterator over byte-terminated records of this reader, where - /// each record is represented as a byte string. - /// - /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where - /// an error is yielded if there was a problem reading from the underlying - /// reader. - /// - /// On success, the next record in the iterator is returned. The record - /// does *not* contain its trailing terminator. - /// - /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in - /// that it has no special handling for `\r`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); - /// - /// let mut records = vec![]; - /// for result in cursor.byte_records(b'\x00') { - /// let record = result?; - /// records.push(record); - /// } - /// assert_eq!(records.len(), 3); - /// assert_eq!(records[0], "lorem".as_bytes()); - /// assert_eq!(records[1], "ipsum".as_bytes()); - /// assert_eq!(records[2], "dolor".as_bytes()); - /// # Ok(()) }; example().unwrap() - /// ``` - fn byte_records(self, terminator: u8) -> ByteRecords<Self> - where - Self: Sized, - { - ByteRecords { terminator, buf: self } - } - - /// Executes the given closure on each line in the underlying reader. - /// - /// If the closure returns an error (or if the underlying reader returns an - /// error), then iteration is stopped and the error is returned. If false - /// is returned, then iteration is stopped and no error is returned. - /// - /// The closure given is called on exactly the same values as yielded by - /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines) - /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes. - /// - /// This routine is useful for iterating over lines as quickly as - /// possible. Namely, a single allocation is reused for each line. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); - /// - /// let mut lines = vec![]; - /// cursor.for_byte_line(|line| { - /// lines.push(line.to_vec()); - /// Ok(true) - /// })?; - /// assert_eq!(lines.len(), 3); - /// assert_eq!(lines[0], "lorem".as_bytes()); - /// assert_eq!(lines[1], "ipsum".as_bytes()); - /// assert_eq!(lines[2], "dolor".as_bytes()); - /// # Ok(()) }; example().unwrap() - /// ``` - fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()> - where - Self: Sized, - F: FnMut(&[u8]) -> io::Result<bool>, - { - self.for_byte_line_with_terminator(|line| { - for_each_line(&trim_line_slice(&line)) - }) - } - - /// Executes the given closure on each byte-terminated record in the - /// underlying reader. - /// - /// If the closure returns an error (or if the underlying reader returns an - /// error), then iteration is stopped and the error is returned. If false - /// is returned, then iteration is stopped and no error is returned. - /// - /// The closure given is called on exactly the same values as yielded by - /// the [`byte_records`](trait.BufReadExt.html#method.byte_records) - /// iterator. Namely, records do _not_ contain a trailing terminator byte. - /// - /// This routine is useful for iterating over records as quickly as - /// possible. Namely, a single allocation is reused for each record. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); - /// - /// let mut records = vec![]; - /// cursor.for_byte_record(b'\x00', |record| { - /// records.push(record.to_vec()); - /// Ok(true) - /// })?; - /// assert_eq!(records.len(), 3); - /// assert_eq!(records[0], "lorem".as_bytes()); - /// assert_eq!(records[1], "ipsum".as_bytes()); - /// assert_eq!(records[2], "dolor".as_bytes()); - /// # Ok(()) }; example().unwrap() - /// ``` - fn for_byte_record<F>( - self, - terminator: u8, - mut for_each_record: F, - ) -> io::Result<()> - where - Self: Sized, - F: FnMut(&[u8]) -> io::Result<bool>, - { - self.for_byte_record_with_terminator(terminator, |chunk| { - for_each_record(&trim_record_slice(&chunk, terminator)) - }) - } - - /// Executes the given closure on each line in the underlying reader. - /// - /// If the closure returns an error (or if the underlying reader returns an - /// error), then iteration is stopped and the error is returned. If false - /// is returned, then iteration is stopped and no error is returned. - /// - /// Unlike - /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line), - /// the lines given to the closure *do* include the line terminator, if one - /// exists. - /// - /// This routine is useful for iterating over lines as quickly as - /// possible. Namely, a single allocation is reused for each line. - /// - /// This is identical to `for_byte_record_with_terminator` with a - /// terminator of `\n`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor"); - /// - /// let mut lines = vec![]; - /// cursor.for_byte_line_with_terminator(|line| { - /// lines.push(line.to_vec()); - /// Ok(true) - /// })?; - /// assert_eq!(lines.len(), 3); - /// assert_eq!(lines[0], "lorem\n".as_bytes()); - /// assert_eq!(lines[1], "ipsum\r\n".as_bytes()); - /// assert_eq!(lines[2], "dolor".as_bytes()); - /// # Ok(()) }; example().unwrap() - /// ``` - fn for_byte_line_with_terminator<F>( - self, - for_each_line: F, - ) -> io::Result<()> - where - Self: Sized, - F: FnMut(&[u8]) -> io::Result<bool>, - { - self.for_byte_record_with_terminator(b'\n', for_each_line) - } - - /// Executes the given closure on each byte-terminated record in the - /// underlying reader. - /// - /// If the closure returns an error (or if the underlying reader returns an - /// error), then iteration is stopped and the error is returned. If false - /// is returned, then iteration is stopped and no error is returned. - /// - /// Unlike - /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record), - /// the lines given to the closure *do* include the record terminator, if - /// one exists. - /// - /// This routine is useful for iterating over records as quickly as - /// possible. Namely, a single allocation is reused for each record. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::io; - /// - /// use bstr::B; - /// use bstr::io::BufReadExt; - /// - /// # fn example() -> Result<(), io::Error> { - /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor"); - /// - /// let mut records = vec![]; - /// cursor.for_byte_record_with_terminator(b'\x00', |record| { - /// records.push(record.to_vec()); - /// Ok(true) - /// })?; - /// assert_eq!(records.len(), 3); - /// assert_eq!(records[0], B(b"lorem\x00")); - /// assert_eq!(records[1], B("ipsum\x00")); - /// assert_eq!(records[2], B("dolor")); - /// # Ok(()) }; example().unwrap() - /// ``` - fn for_byte_record_with_terminator<F>( - mut self, - terminator: u8, - mut for_each_record: F, - ) -> io::Result<()> - where - Self: Sized, - F: FnMut(&[u8]) -> io::Result<bool>, - { - let mut bytes = vec![]; - let mut res = Ok(()); - let mut consumed = 0; - 'outer: loop { - // Lend out complete record slices from our buffer - { - let mut buf = self.fill_buf()?; - while let Some(index) = buf.find_byte(terminator) { - let (record, rest) = buf.split_at(index + 1); - buf = rest; - consumed += record.len(); - match for_each_record(&record) { - Ok(false) => break 'outer, - Err(err) => { - res = Err(err); - break 'outer; - } - _ => (), - } - } - - // Copy the final record fragment to our local buffer. This - // saves read_until() from re-scanning a buffer we know - // contains no remaining terminators. - bytes.extend_from_slice(&buf); - consumed += buf.len(); - } - - self.consume(consumed); - consumed = 0; - - // N.B. read_until uses a different version of memchr that may - // be slower than the memchr crate that bstr uses. However, this - // should only run for a fairly small number of records, assuming a - // decent buffer size. - self.read_until(terminator, &mut bytes)?; - if bytes.is_empty() || !for_each_record(&bytes)? { - break; - } - bytes.clear(); - } - self.consume(consumed); - res - } -} - -impl<B: io::BufRead> BufReadExt for B {} - -/// An iterator over lines from an instance of -/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html). -/// -/// This iterator is generally created by calling the -/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines) -/// method on the -/// [`BufReadExt`](trait.BufReadExt.html) -/// trait. -#[derive(Debug)] -pub struct ByteLines<B> { - buf: B, -} - -/// An iterator over records from an instance of -/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html). -/// -/// A byte record is any sequence of bytes terminated by a particular byte -/// chosen by the caller. For example, NUL separated byte strings are said to -/// be NUL-terminated byte records. -/// -/// This iterator is generally created by calling the -/// [`byte_records`](trait.BufReadExt.html#method.byte_records) -/// method on the -/// [`BufReadExt`](trait.BufReadExt.html) -/// trait. -#[derive(Debug)] -pub struct ByteRecords<B> { - buf: B, - terminator: u8, -} - -impl<B: io::BufRead> Iterator for ByteLines<B> { - type Item = io::Result<Vec<u8>>; - - fn next(&mut self) -> Option<io::Result<Vec<u8>>> { - let mut bytes = vec![]; - match self.buf.read_until(b'\n', &mut bytes) { - Err(e) => Some(Err(e)), - Ok(0) => None, - Ok(_) => { - trim_line(&mut bytes); - Some(Ok(bytes)) - } - } - } -} - -impl<B: io::BufRead> Iterator for ByteRecords<B> { - type Item = io::Result<Vec<u8>>; - - fn next(&mut self) -> Option<io::Result<Vec<u8>>> { - let mut bytes = vec![]; - match self.buf.read_until(self.terminator, &mut bytes) { - Err(e) => Some(Err(e)), - Ok(0) => None, - Ok(_) => { - trim_record(&mut bytes, self.terminator); - Some(Ok(bytes)) - } - } - } -} - -fn trim_line(line: &mut Vec<u8>) { - if line.last_byte() == Some(b'\n') { - line.pop_byte(); - if line.last_byte() == Some(b'\r') { - line.pop_byte(); - } - } -} - -fn trim_line_slice(mut line: &[u8]) -> &[u8] { - if line.last_byte() == Some(b'\n') { - line = &line[..line.len() - 1]; - if line.last_byte() == Some(b'\r') { - line = &line[..line.len() - 1]; - } - } - line -} - -fn trim_record(record: &mut Vec<u8>, terminator: u8) { - if record.last_byte() == Some(terminator) { - record.pop_byte(); - } -} - -fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] { - if record.last_byte() == Some(terminator) { - record = &record[..record.len() - 1]; - } - record -} - -#[cfg(test)] -mod tests { - use super::BufReadExt; - use crate::bstring::BString; - - fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> { - let mut lines = vec![]; - slice - .as_ref() - .for_byte_line(|line| { - lines.push(BString::from(line.to_vec())); - Ok(true) - }) - .unwrap(); - lines - } - - fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> { - let mut lines = vec![]; - slice - .as_ref() - .for_byte_line_with_terminator(|line| { - lines.push(BString::from(line.to_vec())); - Ok(true) - }) - .unwrap(); - lines - } - - #[test] - fn lines_without_terminator() { - assert_eq!(collect_lines(""), Vec::<BString>::new()); - - assert_eq!(collect_lines("\n"), vec![""]); - assert_eq!(collect_lines("\n\n"), vec!["", ""]); - assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]); - assert_eq!(collect_lines("a\nb"), vec!["a", "b"]); - assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]); - assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]); - - assert_eq!(collect_lines("\r\n"), vec![""]); - assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]); - assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]); - assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]); - assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]); - assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]); - - assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]); - } - - #[test] - fn lines_with_terminator() { - assert_eq!(collect_lines_term(""), Vec::<BString>::new()); - - assert_eq!(collect_lines_term("\n"), vec!["\n"]); - assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]); - assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]); - assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]); - assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]); - assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]); - - assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]); - assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]); - assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]); - assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]); - assert_eq!( - collect_lines_term("abc\r\nxyz\r\n"), - vec!["abc\r\n", "xyz\r\n"] - ); - assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]); - - assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]); - } -} |