//! A small module giving you a simple container that allows easy and cheap //! replacement of parts of its content, with the ability to prevent changing //! the same parts multiple times. use anyhow::{anyhow, ensure, Error}; use std::rc::Rc; #[derive(Debug, Clone, PartialEq, Eq)] enum State { Initial, Replaced(Rc<[u8]>), Inserted(Rc<[u8]>), } impl State { fn is_inserted(&self) -> bool { matches!(*self, State::Inserted(..)) } } #[derive(Debug, Clone, PartialEq, Eq)] struct Span { /// Start of this span in parent data start: usize, /// up to end including end: usize, data: State, } /// A container that allows easily replacing chunks of its data #[derive(Debug, Clone, Default)] pub struct Data { original: Vec, parts: Vec, } impl Data { /// Create a new data container from a slice of bytes pub fn new(data: &[u8]) -> Self { Data { original: data.into(), parts: vec![Span { data: State::Initial, start: 0, end: data.len().saturating_sub(1), }], } } /// Render this data as a vector of bytes pub fn to_vec(&self) -> Vec { if self.original.is_empty() { return Vec::new(); } self.parts.iter().fold(Vec::new(), |mut acc, d| { match d.data { State::Initial => acc.extend_from_slice(&self.original[d.start..=d.end]), State::Replaced(ref d) | State::Inserted(ref d) => acc.extend_from_slice(d), }; acc }) } /// Replace a chunk of data with the given slice, erroring when this part /// was already changed previously. pub fn replace_range( &mut self, from: usize, up_to_and_including: usize, data: &[u8], ) -> Result<(), Error> { let exclusive_end = up_to_and_including + 1; ensure!( from <= exclusive_end, "Invalid range {}...{}, start is larger than end", from, up_to_and_including ); ensure!( up_to_and_including <= self.original.len(), "Invalid range {}...{} given, original data is only {} byte long", from, up_to_and_including, self.original.len() ); let insert_only = from == exclusive_end; // Since we error out when replacing an already replaced chunk of data, // we can take some shortcuts here. For example, there can be no // overlapping replacements -- we _always_ split a chunk of 'initial' // data into three[^empty] parts, and there can't ever be two 'initial' // parts touching. // // [^empty]: Leading and trailing ones might be empty if we replace // the whole chunk. As an optimization and without loss of generality we // don't add empty parts. let new_parts = { let index_of_part_to_split = self .parts .iter() .position(|p| { !p.data.is_inserted() && p.start <= from && p.end >= up_to_and_including }) .ok_or_else(|| { use log::Level::Debug; if log_enabled!(Debug) { let slices = self .parts .iter() .map(|p| { ( p.start, p.end, match p.data { State::Initial => "initial", State::Replaced(..) => "replaced", State::Inserted(..) => "inserted", }, ) }) .collect::>(); debug!( "no single slice covering {}...{}, current slices: {:?}", from, up_to_and_including, slices, ); } anyhow!( "Could not replace range {}...{} in file \ -- maybe parts of it were already replaced?", from, up_to_and_including ) })?; let part_to_split = &self.parts[index_of_part_to_split]; // If this replacement matches exactly the part that we would // otherwise split then we ignore this for now. This means that you // can replace the exact same range with the exact same content // multiple times and we'll process and allow it. // // This is currently done to alleviate issues like // rust-lang/rust#51211 although this clause likely wants to be // removed if that's fixed deeper in the compiler. if part_to_split.start == from && part_to_split.end == up_to_and_including { if let State::Replaced(ref replacement) = part_to_split.data { if &**replacement == data { return Ok(()); } } } ensure!( part_to_split.data == State::Initial, "Cannot replace slice of data that was already replaced" ); let mut new_parts = Vec::with_capacity(self.parts.len() + 2); // Previous parts if let Some(ps) = self.parts.get(..index_of_part_to_split) { new_parts.extend_from_slice(ps); } // Keep initial data on left side of part if from > part_to_split.start { new_parts.push(Span { start: part_to_split.start, end: from.saturating_sub(1), data: State::Initial, }); } // New part new_parts.push(Span { start: from, end: up_to_and_including, data: if insert_only { State::Inserted(data.into()) } else { State::Replaced(data.into()) }, }); // Keep initial data on right side of part if up_to_and_including < part_to_split.end { new_parts.push(Span { start: up_to_and_including + 1, end: part_to_split.end, data: State::Initial, }); } // Following parts if let Some(ps) = self.parts.get(index_of_part_to_split + 1..) { new_parts.extend_from_slice(ps); } new_parts }; self.parts = new_parts; Ok(()) } } #[cfg(test)] mod tests { use super::*; use proptest::prelude::*; fn str(i: &[u8]) -> &str { ::std::str::from_utf8(i).unwrap() } #[test] fn replace_some_stuff() { let mut d = Data::new(b"foo bar baz"); d.replace_range(4, 6, b"lol").unwrap(); assert_eq!("foo lol baz", str(&d.to_vec())); } #[test] fn replace_a_single_char() { let mut d = Data::new(b"let y = true;"); d.replace_range(4, 4, b"mut y").unwrap(); assert_eq!("let mut y = true;", str(&d.to_vec())); } #[test] fn replace_multiple_lines() { let mut d = Data::new(b"lorem\nipsum\ndolor"); d.replace_range(6, 10, b"lol").unwrap(); assert_eq!("lorem\nlol\ndolor", str(&d.to_vec())); d.replace_range(12, 16, b"lol").unwrap(); assert_eq!("lorem\nlol\nlol", str(&d.to_vec())); } #[test] fn replace_multiple_lines_with_insert_only() { let mut d = Data::new(b"foo!"); d.replace_range(3, 2, b"bar").unwrap(); assert_eq!("foobar!", str(&d.to_vec())); d.replace_range(0, 2, b"baz").unwrap(); assert_eq!("bazbar!", str(&d.to_vec())); d.replace_range(3, 3, b"?").unwrap(); assert_eq!("bazbar?", str(&d.to_vec())); } #[test] fn replace_invalid_range() { let mut d = Data::new(b"foo!"); assert!(d.replace_range(2, 0, b"bar").is_err()); assert!(d.replace_range(0, 2, b"bar").is_ok()); } #[test] fn empty_to_vec_roundtrip() { let s = ""; assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice()); } #[test] #[should_panic(expected = "Cannot replace slice of data that was already replaced")] fn replace_overlapping_stuff_errs() { let mut d = Data::new(b"foo bar baz"); d.replace_range(4, 6, b"lol").unwrap(); assert_eq!("foo lol baz", str(&d.to_vec())); d.replace_range(4, 6, b"lol2").unwrap(); } #[test] #[should_panic(expected = "original data is only 3 byte long")] fn broken_replacements() { let mut d = Data::new(b"foo"); d.replace_range(4, 7, b"lol").unwrap(); } #[test] fn replace_same_twice() { let mut d = Data::new(b"foo"); d.replace_range(0, 0, b"b").unwrap(); d.replace_range(0, 0, b"b").unwrap(); assert_eq!("boo", str(&d.to_vec())); } proptest! { #[test] #[ignore] fn new_to_vec_roundtrip(ref s in "\\PC*") { assert_eq!(s.as_bytes(), Data::new(s.as_bytes()).to_vec().as_slice()); } #[test] #[ignore] fn replace_random_chunks( ref data in "\\PC*", ref replacements in prop::collection::vec( (any::<::std::ops::Range>(), any::>()), 1..1337, ) ) { let mut d = Data::new(data.as_bytes()); for &(ref range, ref bytes) in replacements { let _ = d.replace_range(range.start, range.end, bytes); } } } }