diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/line-wrap/src/lib.rs | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/line-wrap/src/lib.rs')
-rw-r--r-- | third_party/rust/line-wrap/src/lib.rs | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/third_party/rust/line-wrap/src/lib.rs b/third_party/rust/line-wrap/src/lib.rs new file mode 100644 index 0000000000..3760d10175 --- /dev/null +++ b/third_party/rust/line-wrap/src/lib.rs @@ -0,0 +1,351 @@ +//! Efficiently insert line endings. +//! +//! If you have a buffer full of data and want to insert any sort of regularly-spaced separator, +//! this will do it with a minimum of data copying. Commonly, this is to insert `\n` (see `lf()`) or `\r\n` (`crlf()`), but +//! any byte sequence can be used. +//! +//! 1. Pick a line ending. For single byte separators, see `ByteLineEnding`, or for two bytes, `TwoByteLineEnding`. For +//! arbitrary byte slices, use `SliceLineEnding`. +//! 2. Call `line_wrap`. +//! 3. Your data has been rearranged in place with the specified line ending inserted. +//! +//! # Examples +//! +//! ``` +//! use line_wrap::*; +//! // suppose we have 80 bytes of data in a buffer and we want to wrap as per MIME. +//! // Buffer is large enough to hold line endings. +//! let mut data = vec![0; 82]; +//! +//! assert_eq!(2, line_wrap(&mut data, 80, 76, &crlf())); +//! +//! // first line of zeroes +//! let mut expected_data = vec![0; 76]; +//! // line ending +//! expected_data.extend_from_slice(b"\r\n"); +//! // next line +//! expected_data.extend_from_slice(&[0, 0, 0, 0]); +//! assert_eq!(expected_data, data); +//! ``` +//! +//! # Performance +//! +//! On an i7 6850k: +//! +//! - 10 byte input, 1 byte line length takes ~60ns (~160MiB/s) +//! - 100 byte input, 10 byte lines takes ~60ns (~1.6GiB/s) +//! - Startup costs dominate at these small lengths +//! - 1,000 byte input, 100 byte lines takes ~65ns (~15GiB/s) +//! - 10,000 byte input, 100 byte lines takes ~550ns (~17GiB/s) +//! - In general, `SliceLineEncoding` is about 75% the speed of the fixed-length impls. +//! +//! Naturally, try `cargo +nightly bench` on your hardware to get more representative data. +extern crate safemem; + +/// Unix-style line ending. +pub fn lf() -> ByteLineEnding { ByteLineEnding::new(b'\n') } + +/// Windows-style line ending. +pub fn crlf() -> TwoByteLineEnding { TwoByteLineEnding::new(b'\r', b'\n') } + +/// Writes line endings. +/// +/// The trait allows specialization for the common single and double byte cases, netting nice +/// throughput improvements over simply using a slice for everything. +pub trait LineEnding { + /// Write the line ending into the slice, which starts at the point where the ending should be written and is len() in length + fn write_ending(&self, slice: &mut [u8]); + /// The length of this particular line ending (must be constant and > 0) + fn len(&self) -> usize; +} + +/// A single byte line ending. +/// +/// See `lf()`. +/// +/// # Examples +/// +/// ``` +/// use line_wrap::*; +/// +/// let ending = ByteLineEnding::new(b'\n'); +/// +/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255]; +/// +/// assert_eq!(2, line_wrap(&mut data[..], 6, 2, &ending)); +/// +/// assert_eq!(vec![1, 2, b'\n', 3, 4, b'\n', 5, 6], data); +/// ``` +pub struct ByteLineEnding { + byte: u8 +} + +impl ByteLineEnding { + pub fn new(byte: u8) -> ByteLineEnding { + ByteLineEnding { + byte + } + } +} + +impl LineEnding for ByteLineEnding { + #[inline] + fn write_ending(&self, slice: &mut [u8]) { + slice[0] = self.byte; + } + + #[inline] + fn len(&self) -> usize { + 1 + } +} + +/// A double byte line ending. +/// +/// See `crlf()`. +/// +/// # Examples +/// +/// ``` +/// use line_wrap::*; +/// +/// let ending = TwoByteLineEnding::new(b'\r', b'\n'); +/// +/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255]; +/// +/// assert_eq!(4, line_wrap(&mut data[..], 6, 2, &ending)); +/// +/// assert_eq!(vec![1, 2, b'\r', b'\n', 3, 4, b'\r', b'\n', 5, 6], data); +/// ``` +pub struct TwoByteLineEnding { + byte0: u8, + byte1: u8, +} + +impl TwoByteLineEnding { + pub fn new(byte0: u8, byte1: u8) -> TwoByteLineEnding { + TwoByteLineEnding { + byte0, + byte1, + } + } +} + +impl LineEnding for TwoByteLineEnding { + #[inline] + fn write_ending(&self, slice: &mut [u8]) { + slice[0] = self.byte0; + slice[1] = self.byte1; + } + + #[inline] + fn len(&self) -> usize { + 2 + } +} + +/// A byte slice line ending. +/// +/// Gives up some throughput compared to the specialized single/double byte impls, but works with +/// any length. +/// +/// # Examples +/// +/// ``` +/// use line_wrap::*; +/// +/// let ending = SliceLineEnding::new(b"xyz"); +/// +/// let mut data = vec![1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255]; +/// +/// assert_eq!(6, line_wrap(&mut data[..], 6, 2, &ending)); +/// +/// assert_eq!(vec![1, 2, b'x', b'y', b'z', 3, 4, b'x', b'y', b'z', 5, 6], data); +/// ``` +pub struct SliceLineEnding<'a> { + slice: &'a [u8] +} + +impl<'a> SliceLineEnding<'a> { + pub fn new(slice: &[u8]) -> SliceLineEnding { + SliceLineEnding { + slice + } + } +} + +impl<'a> LineEnding for SliceLineEnding<'a> { + #[inline] + fn write_ending(&self, slice: &mut [u8]) { + slice.copy_from_slice(self.slice); + } + + #[inline] + fn len(&self) -> usize { + self.slice.len() + } +} + +/// Insert line endings into the input. +/// +/// Endings are inserted after each complete line, except the last line, even if the last line takes +/// up the full line width. +/// +/// - `buf` must be large enough to handle the increased size after endings are inserted. In other +/// words, `buf.len() >= input_len / line_len * line_ending.len()`. +/// - `input_len` is the length of the unwrapped in `buf`. +/// - `line_len` is the desired line width without line ending characters. +/// +/// Returns the number of line ending bytes added. +/// +/// # Panics +/// +/// - When `line_ending.len() == 0` +/// - When `buf` is too small to contain the original input and its new line endings +pub fn line_wrap<L: LineEnding>( + buf: &mut [u8], + input_len: usize, + line_len: usize, + line_ending: &L, +) -> usize { + assert!(line_ending.len() > 0); + + if input_len <= line_len { + return 0; + } + + let line_ending_len = line_ending.len(); + let line_wrap_params = line_wrap_parameters(input_len, line_len, line_ending_len); + + // ptr.offset() is undefined if it wraps, and there is no checked_offset(). However, because + // we perform this check up front to make sure we have enough capacity, we know that none of + // the subsequent pointer operations (assuming they implement the desired behavior of course!) + // will overflow. + assert!( + buf.len() >= line_wrap_params.total_len, + "Buffer must be able to hold encoded data after line wrapping" + ); + + // Move the last line, either partial or full, by itself as it does not have a line ending + // afterwards + let last_line_start = input_len.checked_sub(line_wrap_params.last_line_len) + .expect("Last line start index underflow"); + // last line starts immediately after all the wrapped full lines + let new_line_start = line_wrap_params.total_full_wrapped_lines_len; + + safemem::copy_over( + buf, + last_line_start, + new_line_start, + line_wrap_params.last_line_len, + ); + + let mut total_line_ending_bytes = 0; + + // initialize so that the initial decrement will set them correctly + let mut old_line_start = last_line_start; + let mut new_line_start = line_wrap_params.total_full_wrapped_lines_len; + + // handle the full lines + for _ in 0..line_wrap_params.lines_with_endings { + // the index after the end of the line ending we're about to write is the start of the next + // line + let end_of_line_ending = new_line_start; + let start_of_line_ending = end_of_line_ending + .checked_sub(line_ending_len) + .expect("Line ending start index underflow"); + + // doesn't underflow because it's decremented `line_wrap_params.lines_with_endings` times + old_line_start = old_line_start.checked_sub(line_len) + .expect("Old line start index underflow"); + new_line_start = new_line_start.checked_sub(line_wrap_params.line_with_ending_len) + .expect("New line start index underflow"); + + safemem::copy_over(buf, old_line_start, new_line_start, line_len); + + line_ending.write_ending(&mut buf[start_of_line_ending..(end_of_line_ending)]); + total_line_ending_bytes += line_ending_len; + } + + assert_eq!(line_wrap_params.total_line_endings_len, total_line_ending_bytes); + + total_line_ending_bytes +} + +#[derive(Debug, PartialEq)] +struct LineWrapParameters { + line_with_ending_len: usize, + // number of lines that need an ending + lines_with_endings: usize, + // length of last line (which never needs an ending) + last_line_len: usize, + // length of lines that need an ending (which are always full lines), with their endings + total_full_wrapped_lines_len: usize, + // length of all lines, including endings for the ones that need them + total_len: usize, + // length of the line endings only + total_line_endings_len: usize, +} + +/// Calculations about how many lines we'll get for a given line length, line ending, etc. +/// This assumes that the last line will not get an ending, even if it is the full line length. +// Inlining improves short input single-byte by 25%. +#[inline] +fn line_wrap_parameters( + input_len: usize, + line_len: usize, + line_ending_len: usize, +) -> LineWrapParameters { + let line_with_ending_len = line_len + .checked_add(line_ending_len) + .expect("Line length with ending exceeds usize"); + + if input_len <= line_len { + // no wrapping needed + return LineWrapParameters { + line_with_ending_len, + lines_with_endings: 0, + last_line_len: input_len, + total_full_wrapped_lines_len: 0, + total_len: input_len, + total_line_endings_len: 0, + }; + }; + + // lines_with_endings > 0, last_line_len > 0 + let (lines_with_endings, last_line_len) = if input_len % line_len > 0 { + // Every full line has an ending since there is a partial line at the end + (input_len / line_len, input_len % line_len) + } else { + // Every line is a full line, but no trailing ending. + // Subtraction will not underflow since we know input_len > line_len. + (input_len / line_len - 1, line_len) + }; + + // Should we expose exceeding usize via Result to be kind to 16-bit users? Or is that + // always going to be a panic anyway in practice? + + // length of just the full lines with line endings + let total_full_wrapped_lines_len = lines_with_endings + .checked_mul(line_with_ending_len) + .expect("Full lines with endings length exceeds usize"); + // all lines with appropriate endings, including the last line + let total_len = total_full_wrapped_lines_len + .checked_add(last_line_len) + .expect("All lines with endings length exceeds usize"); + let total_line_endings_len = lines_with_endings + .checked_mul(line_ending_len) + .expect("Total line endings length exceeds usize"); + + LineWrapParameters { + line_with_ending_len, + lines_with_endings, + last_line_len, + total_full_wrapped_lines_len, + total_len, + total_line_endings_len, + } +} + +#[cfg(test)] +mod tests;
\ No newline at end of file |