summaryrefslogtreecommitdiffstats
path: root/vendor/bstr/src/io.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bstr/src/io.rs')
-rw-r--r--vendor/bstr/src/io.rs514
1 files changed, 0 insertions, 514 deletions
diff --git a/vendor/bstr/src/io.rs b/vendor/bstr/src/io.rs
deleted file mode 100644
index ad6f3c1bb..000000000
--- a/vendor/bstr/src/io.rs
+++ /dev/null
@@ -1,514 +0,0 @@
-/*!
-Utilities for working with I/O using byte strings.
-
-This module currently only exports a single trait, `BufReadExt`, which provides
-facilities for conveniently and efficiently working with lines as byte strings.
-
-More APIs may be added in the future.
-*/
-
-use std::io;
-
-use crate::ext_slice::ByteSlice;
-use crate::ext_vec::ByteVec;
-
-/// An extention trait for
-/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html)
-/// which provides convenience APIs for dealing with byte strings.
-pub trait BufReadExt: io::BufRead {
- /// Returns an iterator over the lines of this reader, where each line
- /// is represented as a byte string.
- ///
- /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
- /// an error is yielded if there was a problem reading from the underlying
- /// reader.
- ///
- /// On success, the next line in the iterator is returned. The line does
- /// *not* contain a trailing `\n` or `\r\n`.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
- ///
- /// let mut lines = vec![];
- /// for result in cursor.byte_lines() {
- /// let line = result?;
- /// lines.push(line);
- /// }
- /// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem".as_bytes());
- /// assert_eq!(lines[1], "ipsum".as_bytes());
- /// assert_eq!(lines[2], "dolor".as_bytes());
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn byte_lines(self) -> ByteLines<Self>
- where
- Self: Sized,
- {
- ByteLines { buf: self }
- }
-
- /// Returns an iterator over byte-terminated records of this reader, where
- /// each record is represented as a byte string.
- ///
- /// Each item yielded by this iterator is a `io::Result<Vec<u8>>`, where
- /// an error is yielded if there was a problem reading from the underlying
- /// reader.
- ///
- /// On success, the next record in the iterator is returned. The record
- /// does *not* contain its trailing terminator.
- ///
- /// Note that calling `byte_records(b'\n')` differs from `byte_lines()` in
- /// that it has no special handling for `\r`.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
- ///
- /// let mut records = vec![];
- /// for result in cursor.byte_records(b'\x00') {
- /// let record = result?;
- /// records.push(record);
- /// }
- /// assert_eq!(records.len(), 3);
- /// assert_eq!(records[0], "lorem".as_bytes());
- /// assert_eq!(records[1], "ipsum".as_bytes());
- /// assert_eq!(records[2], "dolor".as_bytes());
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn byte_records(self, terminator: u8) -> ByteRecords<Self>
- where
- Self: Sized,
- {
- ByteRecords { terminator, buf: self }
- }
-
- /// Executes the given closure on each line in the underlying reader.
- ///
- /// If the closure returns an error (or if the underlying reader returns an
- /// error), then iteration is stopped and the error is returned. If false
- /// is returned, then iteration is stopped and no error is returned.
- ///
- /// The closure given is called on exactly the same values as yielded by
- /// the [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
- /// iterator. Namely, lines do _not_ contain trailing `\n` or `\r\n` bytes.
- ///
- /// This routine is useful for iterating over lines as quickly as
- /// possible. Namely, a single allocation is reused for each line.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
- ///
- /// let mut lines = vec![];
- /// cursor.for_byte_line(|line| {
- /// lines.push(line.to_vec());
- /// Ok(true)
- /// })?;
- /// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem".as_bytes());
- /// assert_eq!(lines[1], "ipsum".as_bytes());
- /// assert_eq!(lines[2], "dolor".as_bytes());
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn for_byte_line<F>(self, mut for_each_line: F) -> io::Result<()>
- where
- Self: Sized,
- F: FnMut(&[u8]) -> io::Result<bool>,
- {
- self.for_byte_line_with_terminator(|line| {
- for_each_line(&trim_line_slice(&line))
- })
- }
-
- /// Executes the given closure on each byte-terminated record in the
- /// underlying reader.
- ///
- /// If the closure returns an error (or if the underlying reader returns an
- /// error), then iteration is stopped and the error is returned. If false
- /// is returned, then iteration is stopped and no error is returned.
- ///
- /// The closure given is called on exactly the same values as yielded by
- /// the [`byte_records`](trait.BufReadExt.html#method.byte_records)
- /// iterator. Namely, records do _not_ contain a trailing terminator byte.
- ///
- /// This routine is useful for iterating over records as quickly as
- /// possible. Namely, a single allocation is reused for each record.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
- ///
- /// let mut records = vec![];
- /// cursor.for_byte_record(b'\x00', |record| {
- /// records.push(record.to_vec());
- /// Ok(true)
- /// })?;
- /// assert_eq!(records.len(), 3);
- /// assert_eq!(records[0], "lorem".as_bytes());
- /// assert_eq!(records[1], "ipsum".as_bytes());
- /// assert_eq!(records[2], "dolor".as_bytes());
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn for_byte_record<F>(
- self,
- terminator: u8,
- mut for_each_record: F,
- ) -> io::Result<()>
- where
- Self: Sized,
- F: FnMut(&[u8]) -> io::Result<bool>,
- {
- self.for_byte_record_with_terminator(terminator, |chunk| {
- for_each_record(&trim_record_slice(&chunk, terminator))
- })
- }
-
- /// Executes the given closure on each line in the underlying reader.
- ///
- /// If the closure returns an error (or if the underlying reader returns an
- /// error), then iteration is stopped and the error is returned. If false
- /// is returned, then iteration is stopped and no error is returned.
- ///
- /// Unlike
- /// [`for_byte_line`](trait.BufReadExt.html#method.for_byte_line),
- /// the lines given to the closure *do* include the line terminator, if one
- /// exists.
- ///
- /// This routine is useful for iterating over lines as quickly as
- /// possible. Namely, a single allocation is reused for each line.
- ///
- /// This is identical to `for_byte_record_with_terminator` with a
- /// terminator of `\n`.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\nipsum\r\ndolor");
- ///
- /// let mut lines = vec![];
- /// cursor.for_byte_line_with_terminator(|line| {
- /// lines.push(line.to_vec());
- /// Ok(true)
- /// })?;
- /// assert_eq!(lines.len(), 3);
- /// assert_eq!(lines[0], "lorem\n".as_bytes());
- /// assert_eq!(lines[1], "ipsum\r\n".as_bytes());
- /// assert_eq!(lines[2], "dolor".as_bytes());
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn for_byte_line_with_terminator<F>(
- self,
- for_each_line: F,
- ) -> io::Result<()>
- where
- Self: Sized,
- F: FnMut(&[u8]) -> io::Result<bool>,
- {
- self.for_byte_record_with_terminator(b'\n', for_each_line)
- }
-
- /// Executes the given closure on each byte-terminated record in the
- /// underlying reader.
- ///
- /// If the closure returns an error (or if the underlying reader returns an
- /// error), then iteration is stopped and the error is returned. If false
- /// is returned, then iteration is stopped and no error is returned.
- ///
- /// Unlike
- /// [`for_byte_record`](trait.BufReadExt.html#method.for_byte_record),
- /// the lines given to the closure *do* include the record terminator, if
- /// one exists.
- ///
- /// This routine is useful for iterating over records as quickly as
- /// possible. Namely, a single allocation is reused for each record.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// use std::io;
- ///
- /// use bstr::B;
- /// use bstr::io::BufReadExt;
- ///
- /// # fn example() -> Result<(), io::Error> {
- /// let cursor = io::Cursor::new(b"lorem\x00ipsum\x00dolor");
- ///
- /// let mut records = vec![];
- /// cursor.for_byte_record_with_terminator(b'\x00', |record| {
- /// records.push(record.to_vec());
- /// Ok(true)
- /// })?;
- /// assert_eq!(records.len(), 3);
- /// assert_eq!(records[0], B(b"lorem\x00"));
- /// assert_eq!(records[1], B("ipsum\x00"));
- /// assert_eq!(records[2], B("dolor"));
- /// # Ok(()) }; example().unwrap()
- /// ```
- fn for_byte_record_with_terminator<F>(
- mut self,
- terminator: u8,
- mut for_each_record: F,
- ) -> io::Result<()>
- where
- Self: Sized,
- F: FnMut(&[u8]) -> io::Result<bool>,
- {
- let mut bytes = vec![];
- let mut res = Ok(());
- let mut consumed = 0;
- 'outer: loop {
- // Lend out complete record slices from our buffer
- {
- let mut buf = self.fill_buf()?;
- while let Some(index) = buf.find_byte(terminator) {
- let (record, rest) = buf.split_at(index + 1);
- buf = rest;
- consumed += record.len();
- match for_each_record(&record) {
- Ok(false) => break 'outer,
- Err(err) => {
- res = Err(err);
- break 'outer;
- }
- _ => (),
- }
- }
-
- // Copy the final record fragment to our local buffer. This
- // saves read_until() from re-scanning a buffer we know
- // contains no remaining terminators.
- bytes.extend_from_slice(&buf);
- consumed += buf.len();
- }
-
- self.consume(consumed);
- consumed = 0;
-
- // N.B. read_until uses a different version of memchr that may
- // be slower than the memchr crate that bstr uses. However, this
- // should only run for a fairly small number of records, assuming a
- // decent buffer size.
- self.read_until(terminator, &mut bytes)?;
- if bytes.is_empty() || !for_each_record(&bytes)? {
- break;
- }
- bytes.clear();
- }
- self.consume(consumed);
- res
- }
-}
-
-impl<B: io::BufRead> BufReadExt for B {}
-
-/// An iterator over lines from an instance of
-/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
-///
-/// This iterator is generally created by calling the
-/// [`byte_lines`](trait.BufReadExt.html#method.byte_lines)
-/// method on the
-/// [`BufReadExt`](trait.BufReadExt.html)
-/// trait.
-#[derive(Debug)]
-pub struct ByteLines<B> {
- buf: B,
-}
-
-/// An iterator over records from an instance of
-/// [`std::io::BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html).
-///
-/// A byte record is any sequence of bytes terminated by a particular byte
-/// chosen by the caller. For example, NUL separated byte strings are said to
-/// be NUL-terminated byte records.
-///
-/// This iterator is generally created by calling the
-/// [`byte_records`](trait.BufReadExt.html#method.byte_records)
-/// method on the
-/// [`BufReadExt`](trait.BufReadExt.html)
-/// trait.
-#[derive(Debug)]
-pub struct ByteRecords<B> {
- buf: B,
- terminator: u8,
-}
-
-impl<B: io::BufRead> Iterator for ByteLines<B> {
- type Item = io::Result<Vec<u8>>;
-
- fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
- let mut bytes = vec![];
- match self.buf.read_until(b'\n', &mut bytes) {
- Err(e) => Some(Err(e)),
- Ok(0) => None,
- Ok(_) => {
- trim_line(&mut bytes);
- Some(Ok(bytes))
- }
- }
- }
-}
-
-impl<B: io::BufRead> Iterator for ByteRecords<B> {
- type Item = io::Result<Vec<u8>>;
-
- fn next(&mut self) -> Option<io::Result<Vec<u8>>> {
- let mut bytes = vec![];
- match self.buf.read_until(self.terminator, &mut bytes) {
- Err(e) => Some(Err(e)),
- Ok(0) => None,
- Ok(_) => {
- trim_record(&mut bytes, self.terminator);
- Some(Ok(bytes))
- }
- }
- }
-}
-
-fn trim_line(line: &mut Vec<u8>) {
- if line.last_byte() == Some(b'\n') {
- line.pop_byte();
- if line.last_byte() == Some(b'\r') {
- line.pop_byte();
- }
- }
-}
-
-fn trim_line_slice(mut line: &[u8]) -> &[u8] {
- if line.last_byte() == Some(b'\n') {
- line = &line[..line.len() - 1];
- if line.last_byte() == Some(b'\r') {
- line = &line[..line.len() - 1];
- }
- }
- line
-}
-
-fn trim_record(record: &mut Vec<u8>, terminator: u8) {
- if record.last_byte() == Some(terminator) {
- record.pop_byte();
- }
-}
-
-fn trim_record_slice(mut record: &[u8], terminator: u8) -> &[u8] {
- if record.last_byte() == Some(terminator) {
- record = &record[..record.len() - 1];
- }
- record
-}
-
-#[cfg(test)]
-mod tests {
- use super::BufReadExt;
- use crate::bstring::BString;
-
- fn collect_lines<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
- let mut lines = vec![];
- slice
- .as_ref()
- .for_byte_line(|line| {
- lines.push(BString::from(line.to_vec()));
- Ok(true)
- })
- .unwrap();
- lines
- }
-
- fn collect_lines_term<B: AsRef<[u8]>>(slice: B) -> Vec<BString> {
- let mut lines = vec![];
- slice
- .as_ref()
- .for_byte_line_with_terminator(|line| {
- lines.push(BString::from(line.to_vec()));
- Ok(true)
- })
- .unwrap();
- lines
- }
-
- #[test]
- fn lines_without_terminator() {
- assert_eq!(collect_lines(""), Vec::<BString>::new());
-
- assert_eq!(collect_lines("\n"), vec![""]);
- assert_eq!(collect_lines("\n\n"), vec!["", ""]);
- assert_eq!(collect_lines("a\nb\n"), vec!["a", "b"]);
- assert_eq!(collect_lines("a\nb"), vec!["a", "b"]);
- assert_eq!(collect_lines("abc\nxyz\n"), vec!["abc", "xyz"]);
- assert_eq!(collect_lines("abc\nxyz"), vec!["abc", "xyz"]);
-
- assert_eq!(collect_lines("\r\n"), vec![""]);
- assert_eq!(collect_lines("\r\n\r\n"), vec!["", ""]);
- assert_eq!(collect_lines("a\r\nb\r\n"), vec!["a", "b"]);
- assert_eq!(collect_lines("a\r\nb"), vec!["a", "b"]);
- assert_eq!(collect_lines("abc\r\nxyz\r\n"), vec!["abc", "xyz"]);
- assert_eq!(collect_lines("abc\r\nxyz"), vec!["abc", "xyz"]);
-
- assert_eq!(collect_lines("abc\rxyz"), vec!["abc\rxyz"]);
- }
-
- #[test]
- fn lines_with_terminator() {
- assert_eq!(collect_lines_term(""), Vec::<BString>::new());
-
- assert_eq!(collect_lines_term("\n"), vec!["\n"]);
- assert_eq!(collect_lines_term("\n\n"), vec!["\n", "\n"]);
- assert_eq!(collect_lines_term("a\nb\n"), vec!["a\n", "b\n"]);
- assert_eq!(collect_lines_term("a\nb"), vec!["a\n", "b"]);
- assert_eq!(collect_lines_term("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
- assert_eq!(collect_lines_term("abc\nxyz"), vec!["abc\n", "xyz"]);
-
- assert_eq!(collect_lines_term("\r\n"), vec!["\r\n"]);
- assert_eq!(collect_lines_term("\r\n\r\n"), vec!["\r\n", "\r\n"]);
- assert_eq!(collect_lines_term("a\r\nb\r\n"), vec!["a\r\n", "b\r\n"]);
- assert_eq!(collect_lines_term("a\r\nb"), vec!["a\r\n", "b"]);
- assert_eq!(
- collect_lines_term("abc\r\nxyz\r\n"),
- vec!["abc\r\n", "xyz\r\n"]
- );
- assert_eq!(collect_lines_term("abc\r\nxyz"), vec!["abc\r\n", "xyz"]);
-
- assert_eq!(collect_lines_term("abc\rxyz"), vec!["abc\rxyz"]);
- }
-}