diff options
Diffstat (limited to '')
-rw-r--r-- | library/std/src/io/buffered/bufwriter.rs | 674 |
1 files changed, 674 insertions, 0 deletions
diff --git a/library/std/src/io/buffered/bufwriter.rs b/library/std/src/io/buffered/bufwriter.rs new file mode 100644 index 000000000..6acb937e7 --- /dev/null +++ b/library/std/src/io/buffered/bufwriter.rs @@ -0,0 +1,674 @@ +use crate::error; +use crate::fmt; +use crate::io::{ + self, ErrorKind, IntoInnerError, IoSlice, Seek, SeekFrom, Write, DEFAULT_BUF_SIZE, +}; +use crate::mem; +use crate::ptr; + +/// Wraps a writer and buffers its output. +/// +/// It can be excessively inefficient to work directly with something that +/// implements [`Write`]. For example, every call to +/// [`write`][`TcpStream::write`] on [`TcpStream`] results in a system call. A +/// `BufWriter<W>` keeps an in-memory buffer of data and writes it to an underlying +/// writer in large, infrequent batches. +/// +/// `BufWriter<W>` can improve the speed of programs that make *small* and +/// *repeated* write calls to the same file or network socket. It does not +/// help when writing very large amounts at once, or writing just one or a few +/// times. It also provides no advantage when writing to a destination that is +/// in memory, like a <code>[Vec]\<u8></code>. +/// +/// It is critical to call [`flush`] before `BufWriter<W>` is dropped. Though +/// dropping will attempt to flush the contents of the buffer, any errors +/// that happen in the process of dropping will be ignored. Calling [`flush`] +/// ensures that the buffer is empty and thus dropping will not even attempt +/// file operations. +/// +/// # Examples +/// +/// Let's write the numbers one through ten to a [`TcpStream`]: +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::net::TcpStream; +/// +/// let mut stream = TcpStream::connect("127.0.0.1:34254").unwrap(); +/// +/// for i in 0..10 { +/// stream.write(&[i+1]).unwrap(); +/// } +/// ``` +/// +/// Because we're not buffering, we write each one in turn, incurring the +/// overhead of a system call per byte written. We can fix this with a +/// `BufWriter<W>`: +/// +/// ```no_run +/// use std::io::prelude::*; +/// use std::io::BufWriter; +/// use std::net::TcpStream; +/// +/// let mut stream = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); +/// +/// for i in 0..10 { +/// stream.write(&[i+1]).unwrap(); +/// } +/// stream.flush().unwrap(); +/// ``` +/// +/// By wrapping the stream with a `BufWriter<W>`, these ten writes are all grouped +/// together by the buffer and will all be written out in one system call when +/// the `stream` is flushed. +/// +// HACK(#78696): can't use `crate` for associated items +/// [`TcpStream::write`]: super::super::super::net::TcpStream::write +/// [`TcpStream`]: crate::net::TcpStream +/// [`flush`]: BufWriter::flush +#[stable(feature = "rust1", since = "1.0.0")] +pub struct BufWriter<W: Write> { + inner: W, + // The buffer. Avoid using this like a normal `Vec` in common code paths. + // That is, don't use `buf.push`, `buf.extend_from_slice`, or any other + // methods that require bounds checking or the like. This makes an enormous + // difference to performance (we may want to stop using a `Vec` entirely). + buf: Vec<u8>, + // #30888: If the inner writer panics in a call to write, we don't want to + // write the buffered data a second time in BufWriter's destructor. This + // flag tells the Drop impl if it should skip the flush. + panicked: bool, +} + +impl<W: Write> BufWriter<W> { + /// Creates a new `BufWriter<W>` with a default buffer capacity. The default is currently 8 KB, + /// but may change in the future. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new(inner: W) -> BufWriter<W> { + BufWriter::with_capacity(DEFAULT_BUF_SIZE, inner) + } + + /// Creates a new `BufWriter<W>` with at least the specified buffer capacity. + /// + /// # Examples + /// + /// Creating a buffer with a buffer of at least a hundred bytes. + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let stream = TcpStream::connect("127.0.0.1:34254").unwrap(); + /// let mut buffer = BufWriter::with_capacity(100, stream); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_capacity(capacity: usize, inner: W) -> BufWriter<W> { + BufWriter { inner, buf: Vec::with_capacity(capacity), panicked: false } + } + + /// Send data in our local buffer into the inner writer, looping as + /// necessary until either it's all been sent or an error occurs. + /// + /// Because all the data in the buffer has been reported to our owner as + /// "successfully written" (by returning nonzero success values from + /// `write`), any 0-length writes from `inner` must be reported as i/o + /// errors from this method. + pub(in crate::io) fn flush_buf(&mut self) -> io::Result<()> { + /// Helper struct to ensure the buffer is updated after all the writes + /// are complete. It tracks the number of written bytes and drains them + /// all from the front of the buffer when dropped. + struct BufGuard<'a> { + buffer: &'a mut Vec<u8>, + written: usize, + } + + impl<'a> BufGuard<'a> { + fn new(buffer: &'a mut Vec<u8>) -> Self { + Self { buffer, written: 0 } + } + + /// The unwritten part of the buffer + fn remaining(&self) -> &[u8] { + &self.buffer[self.written..] + } + + /// Flag some bytes as removed from the front of the buffer + fn consume(&mut self, amt: usize) { + self.written += amt; + } + + /// true if all of the bytes have been written + fn done(&self) -> bool { + self.written >= self.buffer.len() + } + } + + impl Drop for BufGuard<'_> { + fn drop(&mut self) { + if self.written > 0 { + self.buffer.drain(..self.written); + } + } + } + + let mut guard = BufGuard::new(&mut self.buf); + while !guard.done() { + self.panicked = true; + let r = self.inner.write(guard.remaining()); + self.panicked = false; + + match r { + Ok(0) => { + return Err(io::const_io_error!( + ErrorKind::WriteZero, + "failed to write the buffered data", + )); + } + Ok(n) => guard.consume(n), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + Ok(()) + } + + /// Buffer some data without flushing it, regardless of the size of the + /// data. Writes as much as possible without exceeding capacity. Returns + /// the number of bytes written. + pub(super) fn write_to_buf(&mut self, buf: &[u8]) -> usize { + let available = self.spare_capacity(); + let amt_to_buffer = available.min(buf.len()); + + // SAFETY: `amt_to_buffer` is <= buffer's spare capacity by construction. + unsafe { + self.write_to_buffer_unchecked(&buf[..amt_to_buffer]); + } + + amt_to_buffer + } + + /// Gets a reference to the underlying writer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // we can use reference just like buffer + /// let reference = buffer.get_ref(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_ref(&self) -> &W { + &self.inner + } + + /// Gets a mutable reference to the underlying writer. + /// + /// It is inadvisable to directly write to the underlying writer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // we can use reference just like buffer + /// let reference = buffer.get_mut(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn get_mut(&mut self) -> &mut W { + &mut self.inner + } + + /// Returns a reference to the internally buffered data. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let buf_writer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // See how many bytes are currently buffered + /// let bytes_buffered = buf_writer.buffer().len(); + /// ``` + #[stable(feature = "bufreader_buffer", since = "1.37.0")] + pub fn buffer(&self) -> &[u8] { + &self.buf + } + + /// Returns a mutable reference to the internal buffer. + /// + /// This can be used to write data directly into the buffer without triggering writers + /// to the underlying writer. + /// + /// That the buffer is a `Vec` is an implementation detail. + /// Callers should not modify the capacity as there currently is no public API to do so + /// and thus any capacity changes would be unexpected by the user. + pub(in crate::io) fn buffer_mut(&mut self) -> &mut Vec<u8> { + &mut self.buf + } + + /// Returns the number of bytes the internal buffer can hold without flushing. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let buf_writer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // Check the capacity of the inner buffer + /// let capacity = buf_writer.capacity(); + /// // Calculate how many bytes can be written without flushing + /// let without_flush = capacity - buf_writer.buffer().len(); + /// ``` + #[stable(feature = "buffered_io_capacity", since = "1.46.0")] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + + /// Unwraps this `BufWriter<W>`, returning the underlying writer. + /// + /// The buffer is written out before returning the writer. + /// + /// # Errors + /// + /// An [`Err`] will be returned if an error occurs while flushing the buffer. + /// + /// # Examples + /// + /// ```no_run + /// use std::io::BufWriter; + /// use std::net::TcpStream; + /// + /// let mut buffer = BufWriter::new(TcpStream::connect("127.0.0.1:34254").unwrap()); + /// + /// // unwrap the TcpStream and flush the buffer + /// let stream = buffer.into_inner().unwrap(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_inner(mut self) -> Result<W, IntoInnerError<BufWriter<W>>> { + match self.flush_buf() { + Err(e) => Err(IntoInnerError::new(self, e)), + Ok(()) => Ok(self.into_parts().0), + } + } + + /// Disassembles this `BufWriter<W>`, returning the underlying writer, and any buffered but + /// unwritten data. + /// + /// If the underlying writer panicked, it is not known what portion of the data was written. + /// In this case, we return `WriterPanicked` for the buffered data (from which the buffer + /// contents can still be recovered). + /// + /// `into_parts` makes no attempt to flush data and cannot fail. + /// + /// # Examples + /// + /// ``` + /// use std::io::{BufWriter, Write}; + /// + /// let mut buffer = [0u8; 10]; + /// let mut stream = BufWriter::new(buffer.as_mut()); + /// write!(stream, "too much data").unwrap(); + /// stream.flush().expect_err("it doesn't fit"); + /// let (recovered_writer, buffered_data) = stream.into_parts(); + /// assert_eq!(recovered_writer.len(), 0); + /// assert_eq!(&buffered_data.unwrap(), b"ata"); + /// ``` + #[stable(feature = "bufwriter_into_parts", since = "1.56.0")] + pub fn into_parts(mut self) -> (W, Result<Vec<u8>, WriterPanicked>) { + let buf = mem::take(&mut self.buf); + let buf = if !self.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) }; + + // SAFETY: forget(self) prevents double dropping inner + let inner = unsafe { ptr::read(&mut self.inner) }; + mem::forget(self); + + (inner, buf) + } + + // Ensure this function does not get inlined into `write`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write`, + // it's likely a sign that the client is using an improperly sized buffer + // or their write patterns are somewhat pathological. + #[cold] + #[inline(never)] + fn write_cold(&mut self, buf: &[u8]) -> io::Result<usize> { + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + + // Why not len > capacity? To avoid a needless trip through the buffer when the input + // exactly fills it. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write(buf); + self.panicked = false; + r + } else { + // Write to the buffer. In this case, we write to the buffer even if it fills it + // exactly. Doing otherwise would mean flushing the buffer, then writing this + // input to the inner writer, which in many cases would be a worse strategy. + + // SAFETY: There was either enough spare capacity already, or there wasn't and we + // flushed the buffer to ensure that there is. In the latter case, we know that there + // is because flushing ensured that our entire buffer is spare capacity, and we entered + // this block because the input buffer length is less than that capacity. In either + // case, it's safe to write the input buffer to our buffer. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(buf.len()) + } + } + + // Ensure this function does not get inlined into `write_all`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write_all`, + // it's likely a sign that the client is using an improperly sized buffer + // or their write patterns are somewhat pathological. + #[cold] + #[inline(never)] + fn write_all_cold(&mut self, buf: &[u8]) -> io::Result<()> { + // Normally, `write_all` just calls `write` in a loop. We can do better + // by calling `self.get_mut().write_all()` directly, which avoids + // round trips through the buffer in the event of a series of partial + // writes in some circumstances. + + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + + // Why not len > capacity? To avoid a needless trip through the buffer when the input + // exactly fills it. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write_all(buf); + self.panicked = false; + r + } else { + // Write to the buffer. In this case, we write to the buffer even if it fills it + // exactly. Doing otherwise would mean flushing the buffer, then writing this + // input to the inner writer, which in many cases would be a worse strategy. + + // SAFETY: There was either enough spare capacity already, or there wasn't and we + // flushed the buffer to ensure that there is. In the latter case, we know that there + // is because flushing ensured that our entire buffer is spare capacity, and we entered + // this block because the input buffer length is less than that capacity. In either + // case, it's safe to write the input buffer to our buffer. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(()) + } + } + + // SAFETY: Requires `buf.len() <= self.buf.capacity() - self.buf.len()`, + // i.e., that input buffer length is less than or equal to spare capacity. + #[inline] + unsafe fn write_to_buffer_unchecked(&mut self, buf: &[u8]) { + debug_assert!(buf.len() <= self.spare_capacity()); + let old_len = self.buf.len(); + let buf_len = buf.len(); + let src = buf.as_ptr(); + let dst = self.buf.as_mut_ptr().add(old_len); + ptr::copy_nonoverlapping(src, dst, buf_len); + self.buf.set_len(old_len + buf_len); + } + + #[inline] + fn spare_capacity(&self) -> usize { + self.buf.capacity() - self.buf.len() + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +/// Error returned for the buffered data from `BufWriter::into_parts`, when the underlying +/// writer has previously panicked. Contains the (possibly partly written) buffered data. +/// +/// # Example +/// +/// ``` +/// use std::io::{self, BufWriter, Write}; +/// use std::panic::{catch_unwind, AssertUnwindSafe}; +/// +/// struct PanickingWriter; +/// impl Write for PanickingWriter { +/// fn write(&mut self, buf: &[u8]) -> io::Result<usize> { panic!() } +/// fn flush(&mut self) -> io::Result<()> { panic!() } +/// } +/// +/// let mut stream = BufWriter::new(PanickingWriter); +/// write!(stream, "some data").unwrap(); +/// let result = catch_unwind(AssertUnwindSafe(|| { +/// stream.flush().unwrap() +/// })); +/// assert!(result.is_err()); +/// let (recovered_writer, buffered_data) = stream.into_parts(); +/// assert!(matches!(recovered_writer, PanickingWriter)); +/// assert_eq!(buffered_data.unwrap_err().into_inner(), b"some data"); +/// ``` +pub struct WriterPanicked { + buf: Vec<u8>, +} + +impl WriterPanicked { + /// Returns the perhaps-unwritten data. Some of this data may have been written by the + /// panicking call(s) to the underlying writer, so simply writing it again is not a good idea. + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "bufwriter_into_parts", since = "1.56.0")] + pub fn into_inner(self) -> Vec<u8> { + self.buf + } + + const DESCRIPTION: &'static str = + "BufWriter inner writer panicked, what data remains unwritten is not known"; +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl error::Error for WriterPanicked { + #[allow(deprecated, deprecated_in_future)] + fn description(&self) -> &str { + Self::DESCRIPTION + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl fmt::Display for WriterPanicked { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", Self::DESCRIPTION) + } +} + +#[stable(feature = "bufwriter_into_parts", since = "1.56.0")] +impl fmt::Debug for WriterPanicked { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WriterPanicked") + .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity())) + .finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<W: Write> Write for BufWriter<W> { + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + // Use < instead of <= to avoid a needless trip through the buffer in some cases. + // See `write_cold` for details. + if buf.len() < self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(buf.len()) + } else { + self.write_cold(buf) + } + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + // Use < instead of <= to avoid a needless trip through the buffer in some cases. + // See `write_all_cold` for details. + if buf.len() < self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + Ok(()) + } else { + self.write_all_cold(buf) + } + } + + fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result<usize> { + // FIXME: Consider applying `#[inline]` / `#[inline(never)]` optimizations already applied + // to `write` and `write_all`. The performance benefits can be significant. See #79930. + if self.get_ref().is_write_vectored() { + // We have to handle the possibility that the total length of the buffers overflows + // `usize` (even though this can only happen if multiple `IoSlice`s reference the + // same underlying buffer, as otherwise the buffers wouldn't fit in memory). If the + // computation overflows, then surely the input cannot fit in our buffer, so we forward + // to the inner writer's `write_vectored` method to let it handle it appropriately. + let saturated_total_len = + bufs.iter().fold(0usize, |acc, b| acc.saturating_add(b.len())); + + if saturated_total_len > self.spare_capacity() { + // Flush if the total length of the input exceeds our buffer's spare capacity. + // If we would have overflowed, this condition also holds, and we need to flush. + self.flush_buf()?; + } + + if saturated_total_len >= self.buf.capacity() { + // Forward to our inner writer if the total length of the input is greater than or + // equal to our buffer capacity. If we would have overflowed, this condition also + // holds, and we punt to the inner writer. + self.panicked = true; + let r = self.get_mut().write_vectored(bufs); + self.panicked = false; + r + } else { + // `saturated_total_len < self.buf.capacity()` implies that we did not saturate. + + // SAFETY: We checked whether or not the spare capacity was large enough above. If + // it was, then we're safe already. If it wasn't, we flushed, making sufficient + // room for any input <= the buffer size, which includes this input. + unsafe { + bufs.iter().for_each(|b| self.write_to_buffer_unchecked(b)); + }; + + Ok(saturated_total_len) + } + } else { + let mut iter = bufs.iter(); + let mut total_written = if let Some(buf) = iter.by_ref().find(|&buf| !buf.is_empty()) { + // This is the first non-empty slice to write, so if it does + // not fit in the buffer, we still get to flush and proceed. + if buf.len() > self.spare_capacity() { + self.flush_buf()?; + } + if buf.len() >= self.buf.capacity() { + // The slice is at least as large as the buffering capacity, + // so it's better to write it directly, bypassing the buffer. + self.panicked = true; + let r = self.get_mut().write(buf); + self.panicked = false; + return r; + } else { + // SAFETY: We checked whether or not the spare capacity was large enough above. + // If it was, then we're safe already. If it wasn't, we flushed, making + // sufficient room for any input <= the buffer size, which includes this input. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + buf.len() + } + } else { + return Ok(0); + }; + debug_assert!(total_written != 0); + for buf in iter { + if buf.len() <= self.spare_capacity() { + // SAFETY: safe by above conditional. + unsafe { + self.write_to_buffer_unchecked(buf); + } + + // This cannot overflow `usize`. If we are here, we've written all of the bytes + // so far to our buffer, and we've ensured that we never exceed the buffer's + // capacity. Therefore, `total_written` <= `self.buf.capacity()` <= `usize::MAX`. + total_written += buf.len(); + } else { + break; + } + } + Ok(total_written) + } + } + + fn is_write_vectored(&self) -> bool { + true + } + + fn flush(&mut self) -> io::Result<()> { + self.flush_buf().and_then(|()| self.get_mut().flush()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<W: Write> fmt::Debug for BufWriter<W> +where + W: fmt::Debug, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt.debug_struct("BufWriter") + .field("writer", &self.inner) + .field("buffer", &format_args!("{}/{}", self.buf.len(), self.buf.capacity())) + .finish() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<W: Write + Seek> Seek for BufWriter<W> { + /// Seek to the offset, in bytes, in the underlying writer. + /// + /// Seeking always writes out the internal buffer before seeking. + fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> { + self.flush_buf()?; + self.get_mut().seek(pos) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<W: Write> Drop for BufWriter<W> { + fn drop(&mut self) { + if !self.panicked { + // dtors should not panic, so we ignore a failed flush + let _r = self.flush_buf(); + } + } +} |