// TODO: Revisit the design of `Event` once the `HashMap` raw interface is stabilised. // Ideally `Value`s would be stored inline in `Event`. use indexmap::IndexMap; use std::{ borrow::Cow, io::{self, Write}, mem, num::NonZeroUsize, }; use crate::{ error::{self, Error, ErrorKind, EventKind}, stream::Writer, Date, Integer, Uid, }; pub struct BinaryWriter { writer: PosWriter, events: Vec, dictionary_key_events: Vec, values: IndexMap, ValueState>, /// Pointers into `events` for each of the currently unclosed `Collection` events. collection_stack: Vec, /// The number of `Collection` and unique `Value` events in `events`. num_objects: usize, } struct PosWriter { writer: W, pos: usize, } #[derive(Clone)] struct ObjectRef(NonZeroUsize); /// An array of `len` elements is stored as a `Collection` event followed by `skip_len` events /// containing the contents of the array. e.g. /// /// Collection(ty: Array, len: 2, skip_len: 2) /// Value /// Value /// /// If the array contains another array or dictionary `len` and `skip_len` will differ. e.g. /// /// Collection(ty: Array, len: 2, skip_len: 3) /// Value /// Collection(ty: Array, len: 1, skip_len: 1) /// Value /// /// A dictionary of `len` (key, value) pairs is stored as a `Collection` event followed by /// `skip_len` events containing the contents of the dictionary. The dictionary values are stored /// first. These are followed by a `DictionaryKeys` event and then the keys themselves. e.g. /// /// Collection(ty: Dictionary, len: 2, skip_len: 6) /// Value /// Collection(ty: Array, len: 1, skip_len: 1) /// Value /// DictionaryKeys(2) /// Value (Key) /// Value (Key) /// /// This arrangement simplifies writing dictionaries as they must be written in the order /// (key, key, value, value) instead of (key, value, key, value) as they are passed to the writer. /// Unclosed dictionaries have their keys stored in `dictionary_key_events` and these are only /// moved to the end of the `BinaryWriter::events` array once the dictionary is closed in /// `write_end_collection`. enum Event { Collection(Collection), /// Index of the value in the `values` map. Value(usize), /// The number of dictionary keys following this event. DictionaryKeys(usize), } struct Collection { ty: CollectionType, /// The number of elements in an array or (key, value) pairs in a dictionary. /// Unclosed dictionaries have a `len` equal to the number of keys plus the number of values /// written so far. This is fixed up in `write_end_collection`. len: usize, /// The number of events to skip to get to the next element after the collection. skip: usize, object_ref: Option, } #[derive(Eq, PartialEq)] enum CollectionType { Array, Dictionary, } #[derive(Eq, Hash, PartialEq)] enum Value<'a> { Boolean(bool), Data(Cow<'a, [u8]>), Date(Date), Integer(Integer), /// Floats are deduplicated based on their bitwise value. Real(u64), String(Cow<'a, str>), Uid(Uid), } enum ValueState { /// The value has not been assigned an object reference. Unassigned, /// The value has been assigned an object reference but has not yet been written. Unwritten(ObjectRef), /// The value has been written with the given object reference. Written(ObjectRef), } impl BinaryWriter { pub fn new(writer: W) -> BinaryWriter { BinaryWriter { writer: PosWriter { writer, pos: 0 }, events: Vec::new(), dictionary_key_events: Vec::new(), values: IndexMap::new(), collection_stack: Vec::new(), num_objects: 0, } } fn write_start_collection(&mut self, ty: CollectionType) -> Result<(), Error> { if self.expecting_dictionary_key() { let ty_event_kind = match ty { CollectionType::Array => EventKind::StartArray, CollectionType::Dictionary => EventKind::StartDictionary, }; return Err(ErrorKind::UnexpectedEventType { expected: EventKind::DictionaryKeyOrEndCollection, found: ty_event_kind, } .without_position()); } self.increment_current_collection_len(); self.collection_stack.push(self.events.len()); self.events.push(Event::Collection(Collection { ty, len: 0, skip: 0, object_ref: None, })); self.num_objects += 1; Ok(()) } fn write_end_collection(&mut self) -> Result<(), Error> { let collection_event_index = self.collection_stack.pop().ok_or_else(|| { ErrorKind::UnexpectedEventType { expected: EventKind::ValueOrStartCollection, found: EventKind::EndCollection, } .without_position() })?; let current_event_index = self.events.len() - 1; let c = if let Event::Collection(c) = &mut self.events[collection_event_index] { c } else { unreachable!("items in `collection_stack` always point to a collection event"); }; c.skip = current_event_index - collection_event_index; if let CollectionType::Dictionary = c.ty { // Ensure that every dictionary key is paired with a value. if !is_even(c.len) { return Err(ErrorKind::UnexpectedEventType { expected: EventKind::DictionaryKeyOrEndCollection, found: EventKind::EndCollection, } .without_position()); } // Fix up the dictionary length. It should contain the number of key-value pairs, // not the number of keys and values. c.len /= 2; // To skip past a dictionary we also need to skip the `DictionaryKeys` event and the // keys that follow it. c.skip += 1 + c.len; let len = c.len; self.events.push(Event::DictionaryKeys(len)); // Move the cached dictionary keys to the end of the events array. let keys_start_index = self.dictionary_key_events.len() - len; self.events.extend( self.dictionary_key_events .drain(keys_start_index..) .map(Event::Value), ); } if self.collection_stack.is_empty() { self.write_plist()?; } Ok(()) } fn write_value(&mut self, value: Value) -> Result<(), Error> { let expecting_dictionary_key = self.expecting_dictionary_key(); // Ensure that all dictionary keys are strings. match (&value, expecting_dictionary_key) { (Value::String(_), true) | (_, false) => (), (_, true) => { return Err(ErrorKind::UnexpectedEventType { expected: EventKind::DictionaryKeyOrEndCollection, found: value.event_kind(), } .without_position()) } } // Deduplicate `value`. There is one entry in `values` for each unqiue `Value` in the // plist. let value_index = if let Some((value_index, _, _)) = self.values.get_full(&value) { value_index } else { self.num_objects += 1; let value = value.into_owned(); let (value_index, _) = self.values.insert_full(value, ValueState::Unassigned); value_index }; // Dictionary keys are buffered in `dictionary_key_events` until the dictionary is closed // in `write_end_collection` when they are moved to the end of the `events` array. if expecting_dictionary_key { self.dictionary_key_events.push(value_index); } else { self.events.push(Event::Value(value_index)); } self.increment_current_collection_len(); if self.collection_stack.is_empty() { self.write_plist()?; } Ok(()) } fn expecting_dictionary_key(&self) -> bool { if let Some(&event_index) = self.collection_stack.last() { if let Event::Collection(c) = &self.events[event_index] { c.ty == CollectionType::Dictionary && is_even(c.len) } else { unreachable!("items in `collection_stack` always point to a collection event"); } } else { false } } fn increment_current_collection_len(&mut self) { if let Some(&event_index) = self.collection_stack.last() { if let Event::Collection(c) = &mut self.events[event_index] { c.len += 1; } else { unreachable!("items in `collection_stack` always point to a collection event"); } } } fn write_plist(&mut self) -> Result<(), Error> { assert!(self.collection_stack.is_empty()); // Write header self.writer.write_exact(b"bplist00")?; // Write objects let mut events_vec = mem::replace(&mut self.events, Vec::new()); let mut events = &mut events_vec[..]; let ref_size = plist_ref_size(self.num_objects - 1); let mut offset_table = vec![0; self.num_objects]; // Assign the first (root) event an object reference of zero. let mut next_object_ref = ObjectRef::zero(); match &mut events[0] { Event::Value(value_index) => { let (_, value_state) = value_mut(&mut self.values, *value_index); *value_state = ValueState::Unwritten(next_object_ref.clone_and_increment_self()); } Event::Collection(c) => { c.object_ref = Some(next_object_ref.clone_and_increment_self()); } Event::DictionaryKeys(_) => { unreachable!("`events` starts with a value or collection event") } } while let Some((event, rest)) = events.split_first_mut() { events = rest; match event { Event::Collection(c) => { let collection_events = &mut events[..c.skip]; self.write_plist_collection( c, collection_events, ref_size, &mut next_object_ref, &mut offset_table, )?; } Event::Value(value_index) => { self.write_plist_value(*value_index, &mut offset_table)?; } // Dictionary keys will have already been written in `write_plist_collection` so we // skip over them here. Event::DictionaryKeys(len) => { events = &mut events[*len..]; } } } // Write object offset table let offset_table_offset = self.writer.pos; let offset_size = plist_ref_size(offset_table_offset); for &offset in &offset_table { write_plist_ref(&mut self.writer, offset_size, offset)?; } // Write trailer // 6 zero bytes padding // 1 byte offset size // 1 byte object ref size // 8 bytes number of objects // 8 bytes root object ref (always zero) // 8 bytes file offset of the object offset table let mut trailer = [0; 32]; trailer[6] = offset_size; trailer[7] = ref_size; trailer[8..16].copy_from_slice(&(self.num_objects as u64).to_be_bytes()); trailer[24..32].copy_from_slice(&(offset_table_offset as u64).to_be_bytes()); self.writer.write_exact(&trailer)?; self.writer .flush() .map_err(error::from_io_without_position)?; // Reset plist writer self.writer.pos = 0; events_vec.clear(); self.events = events_vec; self.values.clear(); self.num_objects = 0; Ok(()) } fn write_plist_collection( &mut self, collection: &Collection, events: &mut [Event], ref_size: u8, next_object_ref: &mut ObjectRef, offset_table: &mut Vec, ) -> Result<(), Error> { if let Some(object_ref) = &collection.object_ref { offset_table[object_ref.value()] = self.writer.pos; } else { unreachable!("collection object refs are assigned before this function is called"); } // Split the events in the current collection into keys and values (arrays contain only // values). This is required as dictionary keys appear after values in the `events array // but all keys must be written before any values. let (keys, values, ty) = match collection.ty { CollectionType::Array => (&mut [][..], events, 0xa0), CollectionType::Dictionary => { let keys_start_offset = events.len() - collection.len - 1; let (values, keys) = events.split_at_mut(keys_start_offset); (&mut keys[1..], values, 0xd0) } }; let mut collection_events = keys.iter_mut().chain(values); // Collections are written as a length prefixed array of object references. For an array // the length is the number of elements. For a dictionary it is the number of (key, value) // pairs. write_plist_value_ty_and_size(&mut self.writer, ty, collection.len)?; while let Some(event) = collection_events.next() { let object_ref = match event { Event::Collection(c) => { // We only want to write references to top level elements in the collection so // we skip over the contents of any sub-collections. if c.skip > 0 { let _ = collection_events.nth(c.skip - 1); } // Collections are not deduplicated so they must be assigned an object // reference here. assert!(c.object_ref.is_none()); let object_ref = next_object_ref.clone_and_increment_self(); c.object_ref = Some(object_ref.clone()); object_ref } Event::Value(value_index) => { // Values are deduplicated so we only assign an object reference if we have not // already done so previously. let (_, value_state) = value_mut(&mut self.values, *value_index); match value_state { ValueState::Unassigned => { let object_ref = next_object_ref.clone_and_increment_self(); *value_state = ValueState::Unwritten(object_ref.clone()); object_ref } ValueState::Unwritten(object_ref) | ValueState::Written(object_ref) => { object_ref.clone() } } } Event::DictionaryKeys(_) => unreachable!( "`DictionaryKeys` events are specifically excluded from the iterator" ), }; write_plist_ref(&mut self.writer, ref_size, object_ref.value())?; } // We write dictionary keys here as they appear after values in the `events` array but // should come before values in the plist stream to reduce seeking on read. for key in keys { if let Event::Value(value_index) = key { self.write_plist_value(*value_index, offset_table)?; } else { unreachable!("dictionary keys are assigned as values in `write_end_collection`"); } } Ok(()) } fn write_plist_value( &mut self, value_index: usize, offset_table: &mut Vec, ) -> Result<(), Error> { let (value, value_state) = value_mut(&mut self.values, value_index); let object_ref = match value_state { ValueState::Unassigned => { unreachable!("value object refs are assigned before this function is called"); } ValueState::Unwritten(object_ref) => object_ref.clone(), ValueState::Written(_) => return Ok(()), }; offset_table[object_ref.value()] = self.writer.pos; *value_state = ValueState::Written(object_ref); match value { Value::Boolean(true) => { self.writer.write_exact(&[0x09])?; } Value::Boolean(false) => { self.writer.write_exact(&[0x08])?; } Value::Data(v) => { write_plist_value_ty_and_size(&mut self.writer, 0x40, v.len())?; self.writer.write_exact(&v[..])?; } Value::Date(v) => { let secs = v.to_seconds_since_plist_epoch(); let mut buf: [_; 9] = [0x33, 0, 0, 0, 0, 0, 0, 0, 0]; buf[1..].copy_from_slice(&secs.to_bits().to_be_bytes()); self.writer.write_exact(&buf)?; } Value::Integer(v) => { if let Some(v) = v.as_signed() { if v >= 0 && v <= i64::from(u8::max_value()) { self.writer.write_exact(&[0x10, v as u8])?; } else if v >= 0 && v <= i64::from(u16::max_value()) { let mut buf: [_; 3] = [0x11, 0, 0]; buf[1..].copy_from_slice(&(v as u16).to_be_bytes()); self.writer.write_exact(&buf)?; } else if v >= 0 && v <= i64::from(u32::max_value()) { let mut buf: [_; 5] = [0x12, 0, 0, 0, 0]; buf[1..].copy_from_slice(&(v as u32).to_be_bytes()); self.writer.write_exact(&buf)?; } else { let mut buf: [_; 9] = [0x13, 0, 0, 0, 0, 0, 0, 0, 0]; buf[1..].copy_from_slice(&v.to_be_bytes()); self.writer.write_exact(&buf)?; } } else if let Some(v) = v.as_unsigned() { // `u64`s larger than `i64::max_value()` are stored as signed 128 bit // integers. let mut buf: [_; 17] = [0x14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; buf[1..].copy_from_slice(&i128::from(v).to_be_bytes()); self.writer.write_exact(&buf)?; } else { unreachable!("an integer can be represented as either an i64 or u64"); } } Value::Real(v) => { let mut buf: [_; 9] = [0x23, 0, 0, 0, 0, 0, 0, 0, 0]; buf[1..].copy_from_slice(&v.to_be_bytes()); self.writer.write_exact(&buf)?; } Value::String(v) if v.is_ascii() => { let ascii = v.as_bytes(); write_plist_value_ty_and_size(&mut self.writer, 0x50, ascii.len())?; self.writer.write_exact(ascii)?; } Value::String(v) => { let utf16_len = v.encode_utf16().count(); write_plist_value_ty_and_size(&mut self.writer, 0x60, utf16_len)?; for c in v.encode_utf16() { self.writer.write_exact(&c.to_be_bytes())?; } } Value::Uid(v) => { let v = v.get(); if v <= u64::from(u8::max_value()) { self.writer.write_exact(&[0x80, v as u8])?; } else if v <= u64::from(u16::max_value()) { let mut buf: [_; 3] = [0x81, 0, 0]; buf[1..].copy_from_slice(&(v as u16).to_be_bytes()); self.writer.write_exact(&buf)?; } else if v <= u64::from(u32::max_value()) { let mut buf: [_; 5] = [0x83, 0, 0, 0, 0]; buf[1..].copy_from_slice(&(v as u32).to_be_bytes()); self.writer.write_exact(&buf)?; } else { let mut buf: [_; 9] = [0x87, 0, 0, 0, 0, 0, 0, 0, 0]; buf[1..].copy_from_slice(&(v as u64).to_be_bytes()); self.writer.write_exact(&buf)?; } } } Ok(()) } } impl Writer for BinaryWriter { fn write_start_array(&mut self, _len: Option) -> Result<(), Error> { self.write_start_collection(CollectionType::Array) } fn write_start_dictionary(&mut self, _len: Option) -> Result<(), Error> { self.write_start_collection(CollectionType::Dictionary) } fn write_end_collection(&mut self) -> Result<(), Error> { self.write_end_collection() } fn write_boolean(&mut self, value: bool) -> Result<(), Error> { self.write_value(Value::Boolean(value)) } fn write_data(&mut self, value: &[u8]) -> Result<(), Error> { self.write_value(Value::Data(Cow::Borrowed(value))) } fn write_date(&mut self, value: Date) -> Result<(), Error> { self.write_value(Value::Date(value)) } fn write_integer(&mut self, value: Integer) -> Result<(), Error> { self.write_value(Value::Integer(value)) } fn write_real(&mut self, value: f64) -> Result<(), Error> { self.write_value(Value::Real(value.to_bits())) } fn write_string(&mut self, value: &str) -> Result<(), Error> { self.write_value(Value::String(Cow::Borrowed(value))) } fn write_uid(&mut self, value: Uid) -> Result<(), Error> { self.write_value(Value::Uid(value)) } } fn is_even(value: usize) -> bool { value & 1 == 0 } fn value_mut<'a>( values: &'a mut IndexMap, ValueState>, value_index: usize, ) -> (&'a Value<'static>, &'a mut ValueState) { values .get_index_mut(value_index) .expect("internal consistency error") } fn write_plist_value_ty_and_size( writer: &mut PosWriter, token: u8, size: usize, ) -> Result<(), Error> { if size < 0x0f { writer.write_exact(&[token | (size as u8)])?; } else if size <= u8::max_value() as usize { writer.write_exact(&[token | 0x0f, 0x10, size as u8])?; } else if size <= u16::max_value() as usize { let mut buf: [_; 4] = [token | 0x0f, 0x11, 0, 0]; buf[2..].copy_from_slice(&(size as u16).to_be_bytes()); writer.write_exact(&buf)?; } else if size <= u32::max_value() as usize { let mut buf: [_; 6] = [token | 0x0f, 0x12, 0, 0, 0, 0]; buf[2..].copy_from_slice(&(size as u32).to_be_bytes()); writer.write_exact(&buf)?; } else { let mut buf: [_; 10] = [token | 0x0f, 0x13, 0, 0, 0, 0, 0, 0, 0, 0]; buf[2..].copy_from_slice(&(size as u64).to_be_bytes()); writer.write_exact(&buf)?; } Ok(()) } fn plist_ref_size(max_value: usize) -> u8 { let significant_bits = 64 - (max_value as u64).leading_zeros() as u8; // Convert to number of bytes let significant_bytes = (significant_bits + 7) / 8; // Round up to the next integer byte size which must be power of two. significant_bytes.next_power_of_two() } fn write_plist_ref( writer: &mut PosWriter, ref_size: u8, value: usize, ) -> Result<(), Error> { match ref_size { 1 => writer.write_exact(&[value as u8]), 2 => writer.write_exact(&(value as u16).to_be_bytes()), 4 => writer.write_exact(&(value as u32).to_be_bytes()), 8 => writer.write_exact(&(value as u64).to_be_bytes()), _ => unreachable!("`ref_size` is a power of two less than or equal to 8"), } } impl PosWriter { fn write_exact(&mut self, buf: &[u8]) -> Result<(), Error> { self.write_all(buf) .map_err(error::from_io_without_position)?; Ok(()) } } impl Write for PosWriter { fn write(&mut self, buf: &[u8]) -> io::Result { let count = self.writer.write(buf)?; self.pos = self .pos .checked_add(count) .expect("binary plist cannot be larger than `usize::max_value()` bytes"); Ok(count) } fn flush(&mut self) -> io::Result<()> { self.writer.flush() } } impl ObjectRef { fn zero() -> ObjectRef { ObjectRef(NonZeroUsize::new(1).unwrap()) } fn clone_and_increment_self(&mut self) -> ObjectRef { let current = self.0; self.0 = NonZeroUsize::new(current.get() + 1).unwrap(); ObjectRef(current) } fn value(&self) -> usize { self.0.get() - 1 } } impl<'a> Value<'a> { fn into_owned(self) -> Value<'static> { match self { Value::Boolean(v) => Value::Boolean(v), Value::Data(v) => Value::Data(Cow::Owned(v.into_owned())), Value::Date(v) => Value::Date(v), Value::Integer(v) => Value::Integer(v), Value::Real(v) => Value::Real(v), Value::String(v) => Value::String(Cow::Owned(v.into_owned())), Value::Uid(v) => Value::Uid(v), } } fn event_kind(&self) -> EventKind { match self { Value::Boolean(_) => EventKind::Boolean, Value::Data(_) => EventKind::Data, Value::Date(_) => EventKind::Date, Value::Integer(_) => EventKind::Integer, Value::Real(_) => EventKind::Real, Value::String(_) => EventKind::String, Value::Uid(_) => EventKind::Uid, } } } #[cfg(test)] mod tests { use std::{fs::File, io::Cursor, path::Path}; use crate::{stream::BinaryReader, Value}; fn test_roundtrip(path: &Path) { let reader = File::open(path).unwrap(); let streaming_parser = BinaryReader::new(reader); let value_to_encode = Value::from_events(streaming_parser).unwrap(); let mut buf = Cursor::new(Vec::new()); value_to_encode.to_writer_binary(&mut buf).unwrap(); let buf_inner = buf.into_inner(); let streaming_parser = BinaryReader::new(Cursor::new(buf_inner)); let events: Vec> = streaming_parser.collect(); let value_decoded_from_encode = Value::from_events(events.into_iter()).unwrap(); assert_eq!(value_to_encode, value_decoded_from_encode); } #[test] fn bplist_roundtrip() { test_roundtrip(&Path::new("./tests/data/binary.plist")) } #[test] fn utf16_roundtrip() { test_roundtrip(&Path::new("./tests/data/utf16_bplist.plist")) } #[test] fn nskeyedarchiver_roundtrip() { test_roundtrip(&Path::new("./tests/data/binary_NSKeyedArchiver.plist")) } }