diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/plist/src/stream | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/plist/src/stream')
-rw-r--r-- | third_party/rust/plist/src/stream/binary_reader.rs | 492 | ||||
-rw-r--r-- | third_party/rust/plist/src/stream/binary_writer.rs | 736 | ||||
-rw-r--r-- | third_party/rust/plist/src/stream/mod.rs | 266 | ||||
-rw-r--r-- | third_party/rust/plist/src/stream/xml_reader.rs | 275 | ||||
-rw-r--r-- | third_party/rust/plist/src/stream/xml_writer.rs | 391 |
5 files changed, 2160 insertions, 0 deletions
diff --git a/third_party/rust/plist/src/stream/binary_reader.rs b/third_party/rust/plist/src/stream/binary_reader.rs new file mode 100644 index 0000000000..a5d2d47ede --- /dev/null +++ b/third_party/rust/plist/src/stream/binary_reader.rs @@ -0,0 +1,492 @@ +use std::{ + io::{self, Read, Seek, SeekFrom}, + mem::size_of, +}; + +use crate::{ + date::{Date, InfiniteOrNanDate}, + error::{Error, ErrorKind}, + stream::{Event, OwnedEvent}, + u64_to_usize, Uid, +}; + +struct StackItem { + object_ref: u64, + child_object_refs: Vec<u64>, + ty: StackType, +} + +enum StackType { + Array, + Dict, +} + +// https://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c +// https://hg.python.org/cpython/file/3.4/Lib/plistlib.py +pub struct BinaryReader<R> { + stack: Vec<StackItem>, + object_offsets: Vec<u64>, + object_on_stack: Vec<bool>, + reader: PosReader<R>, + ref_size: u8, + root_object: u64, + trailer_start_offset: u64, +} + +struct PosReader<R> { + reader: R, + pos: u64, +} + +impl<R: Read + Seek> PosReader<R> { + fn read_all(&mut self, buf: &mut [u8]) -> Result<(), Error> { + self.read_exact(buf) + .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?; + Ok(()) + } + + fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> { + self.pos = self + .reader + .seek(pos) + .map_err(|err| ErrorKind::Io(err).with_byte_offset(self.pos))?; + Ok(self.pos) + } +} + +impl<R: Read> Read for PosReader<R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let count = self.reader.read(buf)?; + self.pos + .checked_add(count as u64) + .expect("file cannot be larger than `u64::max_value()` bytes"); + Ok(count) + } +} + +impl<R: Read + Seek> BinaryReader<R> { + pub fn new(reader: R) -> BinaryReader<R> { + BinaryReader { + stack: Vec::new(), + object_offsets: Vec::new(), + object_on_stack: Vec::new(), + reader: PosReader { reader, pos: 0 }, + ref_size: 0, + root_object: 0, + trailer_start_offset: 0, + } + } + + fn allocate_vec<T>(&self, len: u64, size: usize) -> Result<Vec<T>, Error> { + // Check we are not reading past the start of the plist trailer + let inner = |len: u64, size: usize| { + let byte_len = len.checked_mul(size as u64)?; + let end_offset = self.reader.pos.checked_add(byte_len)?; + if end_offset <= self.trailer_start_offset { + Some(()) + } else { + None + } + }; + inner(len, size).ok_or_else(|| self.with_pos(ErrorKind::ObjectOffsetTooLarge))?; + + Ok(Vec::with_capacity(len as usize)) + } + + fn read_trailer(&mut self) -> Result<(), Error> { + self.reader.seek(SeekFrom::Start(0))?; + let mut magic = [0; 8]; + self.reader.read_all(&mut magic)?; + if &magic != b"bplist00" { + return Err(self.with_pos(ErrorKind::InvalidMagic)); + } + + self.trailer_start_offset = self.reader.seek(SeekFrom::End(-32))?; + + // Trailer starts with 6 bytes of padding + let mut zeros = [0; 6]; + self.reader.read_all(&mut zeros)?; + + let offset_size = self.read_u8()?; + match offset_size { + 1 | 2 | 4 | 8 => (), + _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectOffsetSize)), + } + + self.ref_size = self.read_u8()?; + match self.ref_size { + 1 | 2 | 4 | 8 => (), + _ => return Err(self.with_pos(ErrorKind::InvalidTrailerObjectReferenceSize)), + } + + let num_objects = self.read_be_u64()?; + self.root_object = self.read_be_u64()?; + let offset_table_offset = self.read_be_u64()?; + + // Read offset table + self.reader.seek(SeekFrom::Start(offset_table_offset))?; + self.object_offsets = self.read_ints(num_objects, offset_size)?; + self.object_on_stack = vec![false; self.object_offsets.len()]; + + Ok(()) + } + + /// Reads a list of `len` big-endian integers of `size` bytes from the reader. + fn read_ints(&mut self, len: u64, size: u8) -> Result<Vec<u64>, Error> { + let mut ints = self.allocate_vec(len, size as usize)?; + for _ in 0..len { + match size { + 1 => ints.push(self.read_u8()?.into()), + 2 => ints.push(self.read_be_u16()?.into()), + 4 => ints.push(self.read_be_u32()?.into()), + 8 => ints.push(self.read_be_u64()?), + _ => unreachable!("size is either self.ref_size or offset_size both of which are already validated") + } + } + Ok(ints) + } + + /// Reads a list of `len` offsets into the object table from the reader. + fn read_refs(&mut self, len: u64) -> Result<Vec<u64>, Error> { + let ref_size = self.ref_size; + self.read_ints(len, ref_size) + } + + /// Reads a compressed value length from the reader. `len` must contain the low 4 bits of the + /// object token. + fn read_object_len(&mut self, len: u8) -> Result<u64, Error> { + if (len & 0x0f) == 0x0f { + let len_power_of_two = self.read_u8()? & 0x03; + Ok(match len_power_of_two { + 0 => self.read_u8()?.into(), + 1 => self.read_be_u16()?.into(), + 2 => self.read_be_u32()?.into(), + 3 => self.read_be_u64()?, + _ => return Err(self.with_pos(ErrorKind::InvalidObjectLength)), + }) + } else { + Ok(len.into()) + } + } + + /// Reads `len` bytes from the reader. + fn read_data(&mut self, len: u64) -> Result<Vec<u8>, Error> { + let mut data = self.allocate_vec(len, size_of::<u8>())?; + data.resize(len as usize, 0); + self.reader.read_all(&mut data)?; + Ok(data) + } + + fn seek_to_object(&mut self, object_ref: u64) -> Result<u64, Error> { + let object_ref = u64_to_usize(object_ref) + .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?; + let offset = *self + .object_offsets + .get(object_ref) + .ok_or_else(|| self.with_pos(ErrorKind::ObjectReferenceTooLarge))?; + if offset >= self.trailer_start_offset { + return Err(self.with_pos(ErrorKind::ObjectOffsetTooLarge)); + } + Ok(self.reader.seek(SeekFrom::Start(offset))?) + } + + fn push_stack_item_and_check_for_recursion(&mut self, item: StackItem) -> Result<(), Error> { + let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error"); + let is_on_stack = &mut self.object_on_stack[object_ref]; + if *is_on_stack { + return Err(self.with_pos(ErrorKind::RecursiveObject)); + } + *is_on_stack = true; + self.stack.push(item); + Ok(()) + } + + fn pop_stack_item(&mut self) -> StackItem { + let item = self.stack.pop().expect("internal consistency error"); + let object_ref = u64_to_usize(item.object_ref).expect("internal consistency error"); + self.object_on_stack[object_ref] = false; + item + } + + fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> { + let object_ref = if self.ref_size == 0 { + // Initialise here rather than in new + self.read_trailer()?; + self.root_object + } else { + let maybe_object_ref = if let Some(stack_item) = self.stack.last_mut() { + stack_item.child_object_refs.pop() + } else { + // Finished reading the plist + return Ok(None); + }; + + if let Some(object_ref) = maybe_object_ref { + object_ref + } else { + // We're at the end of an array or dict. Pop the top stack item and return. + let stack_item = self.pop_stack_item(); + match stack_item.ty { + StackType::Array | StackType::Dict => return Ok(Some(Event::EndCollection)), + } + } + }; + + self.seek_to_object(object_ref)?; + + let token = self.read_u8()?; + let ty = (token & 0xf0) >> 4; + let size = token & 0x0f; + + let result = match (ty, size) { + (0x0, 0x00) => return Err(self.with_pos(ErrorKind::NullObjectUnimplemented)), + (0x0, 0x08) => Some(Event::Boolean(false)), + (0x0, 0x09) => Some(Event::Boolean(true)), + (0x0, 0x0f) => return Err(self.with_pos(ErrorKind::FillObjectUnimplemented)), + (0x1, 0) => Some(Event::Integer(self.read_u8()?.into())), + (0x1, 1) => Some(Event::Integer(self.read_be_u16()?.into())), + (0x1, 2) => Some(Event::Integer(self.read_be_u32()?.into())), + (0x1, 3) => Some(Event::Integer(self.read_be_i64()?.into())), + (0x1, 4) => { + let value = self.read_be_i128()?; + if value < 0 || value > u64::max_value().into() { + return Err(self.with_pos(ErrorKind::IntegerOutOfRange)); + } + Some(Event::Integer((value as u64).into())) + } + (0x1, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // variable length int + (0x2, 2) => Some(Event::Real(f32::from_bits(self.read_be_u32()?).into())), + (0x2, 3) => Some(Event::Real(f64::from_bits(self.read_be_u64()?))), + (0x2, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), // odd length float + (0x3, 3) => { + // Date. Seconds since 1/1/2001 00:00:00. + let secs = f64::from_bits(self.read_be_u64()?); + let date = Date::from_seconds_since_plist_epoch(secs) + .map_err(|InfiniteOrNanDate| self.with_pos(ErrorKind::InfiniteOrNanDate))?; + Some(Event::Date(date)) + } + (0x4, n) => { + // Data + let len = self.read_object_len(n)?; + Some(Event::Data(self.read_data(len)?.into())) + } + (0x5, n) => { + // ASCII string + let len = self.read_object_len(n)?; + let raw = self.read_data(len)?; + let string = String::from_utf8(raw) + .map_err(|_| self.with_pos(ErrorKind::InvalidUtf8String))?; + Some(Event::String(string.into())) + } + (0x6, n) => { + // UTF-16 string + let len_utf16_codepoints = self.read_object_len(n)?; + let mut raw_utf16 = self.allocate_vec(len_utf16_codepoints, size_of::<u16>())?; + + for _ in 0..len_utf16_codepoints { + raw_utf16.push(self.read_be_u16()?); + } + + let string = String::from_utf16(&raw_utf16) + .map_err(|_| self.with_pos(ErrorKind::InvalidUtf16String))?; + Some(Event::String(string.into())) + } + (0x8, n) if n < 8 => { + // Uid + let mut buf = [0; 8]; + // `len_bytes` is at most 8. + let len_bytes = n as usize + 1; + // Values are stored in big-endian so we must put the least significant bytes at + // the end of the buffer. + self.reader.read_all(&mut buf[8 - len_bytes..])?; + let value = u64::from_be_bytes(buf); + + Some(Event::Uid(Uid::new(value))) + } + (0xa, n) => { + // Array + let len = self.read_object_len(n)?; + let mut child_object_refs = self.read_refs(len)?; + // Reverse so we can pop off the end of the stack in order + child_object_refs.reverse(); + + self.push_stack_item_and_check_for_recursion(StackItem { + object_ref, + ty: StackType::Array, + child_object_refs, + })?; + + Some(Event::StartArray(Some(len))) + } + (0xd, n) => { + // Dict + let len = self.read_object_len(n)?; + let key_refs = self.read_refs(len)?; + let value_refs = self.read_refs(len)?; + + let keys_and_values_len = len + .checked_mul(2) + .ok_or_else(|| self.with_pos(ErrorKind::ObjectTooLarge))?; + let mut child_object_refs = + self.allocate_vec(keys_and_values_len, self.ref_size as usize)?; + let len = key_refs.len(); + for i in 1..=len { + // Reverse so we can pop off the end of the stack in order + child_object_refs.push(value_refs[len - i]); + child_object_refs.push(key_refs[len - i]); + } + + self.push_stack_item_and_check_for_recursion(StackItem { + object_ref, + ty: StackType::Dict, + child_object_refs, + })?; + + Some(Event::StartDictionary(Some(len as u64))) + } + (_, _) => return Err(self.with_pos(ErrorKind::UnknownObjectType(token))), + }; + + Ok(result) + } + + fn read_u8(&mut self) -> Result<u8, Error> { + let mut buf = [0; 1]; + self.reader.read_all(&mut buf)?; + Ok(buf[0]) + } + + fn read_be_u16(&mut self) -> Result<u16, Error> { + let mut buf = [0; 2]; + self.reader.read_all(&mut buf)?; + Ok(u16::from_be_bytes(buf)) + } + + fn read_be_u32(&mut self) -> Result<u32, Error> { + let mut buf = [0; 4]; + self.reader.read_all(&mut buf)?; + Ok(u32::from_be_bytes(buf)) + } + + fn read_be_u64(&mut self) -> Result<u64, Error> { + let mut buf = [0; 8]; + self.reader.read_all(&mut buf)?; + Ok(u64::from_be_bytes(buf)) + } + + fn read_be_i64(&mut self) -> Result<i64, Error> { + let mut buf = [0; 8]; + self.reader.read_all(&mut buf)?; + Ok(i64::from_be_bytes(buf)) + } + + fn read_be_i128(&mut self) -> Result<i128, Error> { + let mut buf = [0; 16]; + self.reader.read_all(&mut buf)?; + Ok(i128::from_be_bytes(buf)) + } + + fn with_pos(&self, kind: ErrorKind) -> Error { + kind.with_byte_offset(self.reader.pos) + } +} + +impl<R: Read + Seek> Iterator for BinaryReader<R> { + type Item = Result<OwnedEvent, Error>; + + fn next(&mut self) -> Option<Result<OwnedEvent, Error>> { + match self.read_next() { + Ok(Some(event)) => Some(Ok(event)), + Err(err) => { + // Mark the plist as finished + self.stack.clear(); + Some(Err(err)) + } + Ok(None) => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::{fs::File, path::Path}; + + use super::*; + use crate::{stream::Event, Uid}; + + #[test] + fn streaming_parser() { + use crate::stream::Event::*; + + let reader = File::open(&Path::new("./tests/data/binary.plist")).unwrap(); + let streaming_parser = BinaryReader::new(reader); + let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); + + let comparison = &[ + StartDictionary(Some(13)), + String("Author".into()), + String("William Shakespeare".into()), + String("Birthdate".into()), + Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()), + String("EmptyArray".into()), + StartArray(Some(0)), + EndCollection, + String("IsNotFalse".into()), + Boolean(false), + String("SmallestNumber".into()), + Integer((-9223372036854775808i64).into()), + String("EmptyDictionary".into()), + StartDictionary(Some(0)), + EndCollection, + String("Height".into()), + Real(1.6), + String("Lines".into()), + StartArray(Some(2)), + String("It is a tale told by an idiot,".into()), + String("Full of sound and fury, signifying nothing.".into()), + EndCollection, + String("Death".into()), + Integer(1564.into()), + String("Blank".into()), + String("".into()), + String("BiggestNumber".into()), + Integer(18446744073709551615u64.into()), + String("IsTrue".into()), + Boolean(true), + String("Data".into()), + Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()), + EndCollection, + ]; + + assert_eq!(events, &comparison[..]); + } + + #[test] + fn utf16_plist() { + let reader = File::open(&Path::new("./tests/data/utf16_bplist.plist")).unwrap(); + let streaming_parser = BinaryReader::new(reader); + let mut events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); + + assert_eq!(events[2], Event::String("\u{2605} or better".into())); + + let poem = if let Event::String(ref mut poem) = events[4] { + poem + } else { + panic!("not a string") + }; + assert_eq!(poem.len(), 643); + assert_eq!(poem.to_mut().pop().unwrap(), '\u{2605}'); + } + + #[test] + fn nskeyedarchiver_plist() { + let reader = File::open(&Path::new("./tests/data/binary_NSKeyedArchiver.plist")).unwrap(); + let streaming_parser = BinaryReader::new(reader); + let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); + + assert_eq!(events[10], Event::Uid(Uid::new(4))); + assert_eq!(events[12], Event::Uid(Uid::new(2))); + assert_eq!(events[18], Event::Uid(Uid::new(3))); + assert_eq!(events[46], Event::Uid(Uid::new(1))); + } +} diff --git a/third_party/rust/plist/src/stream/binary_writer.rs b/third_party/rust/plist/src/stream/binary_writer.rs new file mode 100644 index 0000000000..0e04dbe865 --- /dev/null +++ b/third_party/rust/plist/src/stream/binary_writer.rs @@ -0,0 +1,736 @@ +// TODO: Revisit the design of `Event` once the `HashMap` raw interface is stabilised. +// Ideally `Value`s would be stored inline in `Event`. + +use indexmap::IndexMap; +use std::{ + borrow::Cow, + io::{self, Write}, + mem, + num::NonZeroUsize, +}; + +use crate::{ + error::{self, Error, ErrorKind, EventKind}, + stream::Writer, + Date, Integer, Uid, +}; + +pub struct BinaryWriter<W: Write> { + writer: PosWriter<W>, + events: Vec<Event>, + dictionary_key_events: Vec<usize>, + values: IndexMap<Value<'static>, ValueState>, + /// Pointers into `events` for each of the currently unclosed `Collection` events. + collection_stack: Vec<usize>, + /// The number of `Collection` and unique `Value` events in `events`. + num_objects: usize, +} + +struct PosWriter<W: Write> { + writer: W, + pos: usize, +} + +#[derive(Clone)] +struct ObjectRef(NonZeroUsize); + +/// An array of `len` elements is stored as a `Collection` event followed by `skip_len` events +/// containing the contents of the array. e.g. +/// +/// Collection(ty: Array, len: 2, skip_len: 2) +/// Value +/// Value +/// +/// If the array contains another array or dictionary `len` and `skip_len` will differ. e.g. +/// +/// Collection(ty: Array, len: 2, skip_len: 3) +/// Value +/// Collection(ty: Array, len: 1, skip_len: 1) +/// Value +/// +/// A dictionary of `len` (key, value) pairs is stored as a `Collection` event followed by +/// `skip_len` events containing the contents of the dictionary. The dictionary values are stored +/// first. These are followed by a `DictionaryKeys` event and then the keys themselves. e.g. +/// +/// Collection(ty: Dictionary, len: 2, skip_len: 6) +/// Value +/// Collection(ty: Array, len: 1, skip_len: 1) +/// Value +/// DictionaryKeys(2) +/// Value (Key) +/// Value (Key) +/// +/// This arrangement simplifies writing dictionaries as they must be written in the order +/// (key, key, value, value) instead of (key, value, key, value) as they are passed to the writer. +/// Unclosed dictionaries have their keys stored in `dictionary_key_events` and these are only +/// moved to the end of the `BinaryWriter::events` array once the dictionary is closed in +/// `write_end_collection`. +enum Event { + Collection(Collection), + /// Index of the value in the `values` map. + Value(usize), + /// The number of dictionary keys following this event. + DictionaryKeys(usize), +} + +struct Collection { + ty: CollectionType, + /// The number of elements in an array or (key, value) pairs in a dictionary. + /// Unclosed dictionaries have a `len` equal to the number of keys plus the number of values + /// written so far. This is fixed up in `write_end_collection`. + len: usize, + /// The number of events to skip to get to the next element after the collection. + skip: usize, + object_ref: Option<ObjectRef>, +} + +#[derive(Eq, PartialEq)] +enum CollectionType { + Array, + Dictionary, +} + +#[derive(Eq, Hash, PartialEq)] +enum Value<'a> { + Boolean(bool), + Data(Cow<'a, [u8]>), + Date(Date), + Integer(Integer), + /// Floats are deduplicated based on their bitwise value. + Real(u64), + String(Cow<'a, str>), + Uid(Uid), +} + +enum ValueState { + /// The value has not been assigned an object reference. + Unassigned, + /// The value has been assigned an object reference but has not yet been written. + Unwritten(ObjectRef), + /// The value has been written with the given object reference. + Written(ObjectRef), +} + +impl<W: Write> BinaryWriter<W> { + pub fn new(writer: W) -> BinaryWriter<W> { + BinaryWriter { + writer: PosWriter { writer, pos: 0 }, + events: Vec::new(), + dictionary_key_events: Vec::new(), + values: IndexMap::new(), + collection_stack: Vec::new(), + num_objects: 0, + } + } + + fn write_start_collection(&mut self, ty: CollectionType) -> Result<(), Error> { + if self.expecting_dictionary_key() { + let ty_event_kind = match ty { + CollectionType::Array => EventKind::StartArray, + CollectionType::Dictionary => EventKind::StartDictionary, + }; + return Err(ErrorKind::UnexpectedEventType { + expected: EventKind::DictionaryKeyOrEndCollection, + found: ty_event_kind, + } + .without_position()); + } + self.increment_current_collection_len(); + self.collection_stack.push(self.events.len()); + self.events.push(Event::Collection(Collection { + ty, + len: 0, + skip: 0, + object_ref: None, + })); + self.num_objects += 1; + Ok(()) + } + + fn write_end_collection(&mut self) -> Result<(), Error> { + let collection_event_index = self.collection_stack.pop().ok_or_else(|| { + ErrorKind::UnexpectedEventType { + expected: EventKind::ValueOrStartCollection, + found: EventKind::EndCollection, + } + .without_position() + })?; + + let current_event_index = self.events.len() - 1; + let c = if let Event::Collection(c) = &mut self.events[collection_event_index] { + c + } else { + unreachable!("items in `collection_stack` always point to a collection event"); + }; + + c.skip = current_event_index - collection_event_index; + + if let CollectionType::Dictionary = c.ty { + // Ensure that every dictionary key is paired with a value. + if !is_even(c.len) { + return Err(ErrorKind::UnexpectedEventType { + expected: EventKind::DictionaryKeyOrEndCollection, + found: EventKind::EndCollection, + } + .without_position()); + } + + // Fix up the dictionary length. It should contain the number of key-value pairs, + // not the number of keys and values. + c.len /= 2; + + // To skip past a dictionary we also need to skip the `DictionaryKeys` event and the + // keys that follow it. + c.skip += 1 + c.len; + let len = c.len; + self.events.push(Event::DictionaryKeys(len)); + + // Move the cached dictionary keys to the end of the events array. + let keys_start_index = self.dictionary_key_events.len() - len; + self.events.extend( + self.dictionary_key_events + .drain(keys_start_index..) + .map(Event::Value), + ); + } + + if self.collection_stack.is_empty() { + self.write_plist()?; + } + + Ok(()) + } + + fn write_value(&mut self, value: Value) -> Result<(), Error> { + let expecting_dictionary_key = self.expecting_dictionary_key(); + + // Ensure that all dictionary keys are strings. + match (&value, expecting_dictionary_key) { + (Value::String(_), true) | (_, false) => (), + (_, true) => { + return Err(ErrorKind::UnexpectedEventType { + expected: EventKind::DictionaryKeyOrEndCollection, + found: value.event_kind(), + } + .without_position()) + } + } + + // Deduplicate `value`. There is one entry in `values` for each unqiue `Value` in the + // plist. + let value_index = if let Some((value_index, _, _)) = self.values.get_full(&value) { + value_index + } else { + self.num_objects += 1; + let value = value.into_owned(); + let (value_index, _) = self.values.insert_full(value, ValueState::Unassigned); + value_index + }; + + // Dictionary keys are buffered in `dictionary_key_events` until the dictionary is closed + // in `write_end_collection` when they are moved to the end of the `events` array. + if expecting_dictionary_key { + self.dictionary_key_events.push(value_index); + } else { + self.events.push(Event::Value(value_index)); + } + + self.increment_current_collection_len(); + + if self.collection_stack.is_empty() { + self.write_plist()?; + } + + Ok(()) + } + + fn expecting_dictionary_key(&self) -> bool { + if let Some(&event_index) = self.collection_stack.last() { + if let Event::Collection(c) = &self.events[event_index] { + c.ty == CollectionType::Dictionary && is_even(c.len) + } else { + unreachable!("items in `collection_stack` always point to a collection event"); + } + } else { + false + } + } + + fn increment_current_collection_len(&mut self) { + if let Some(&event_index) = self.collection_stack.last() { + if let Event::Collection(c) = &mut self.events[event_index] { + c.len += 1; + } else { + unreachable!("items in `collection_stack` always point to a collection event"); + } + } + } + + fn write_plist(&mut self) -> Result<(), Error> { + assert!(self.collection_stack.is_empty()); + + // Write header + self.writer.write_exact(b"bplist00")?; + + // Write objects + let mut events_vec = mem::replace(&mut self.events, Vec::new()); + let mut events = &mut events_vec[..]; + let ref_size = plist_ref_size(self.num_objects - 1); + let mut offset_table = vec![0; self.num_objects]; + + // Assign the first (root) event an object reference of zero. + let mut next_object_ref = ObjectRef::zero(); + match &mut events[0] { + Event::Value(value_index) => { + let (_, value_state) = value_mut(&mut self.values, *value_index); + *value_state = ValueState::Unwritten(next_object_ref.clone_and_increment_self()); + } + Event::Collection(c) => { + c.object_ref = Some(next_object_ref.clone_and_increment_self()); + } + Event::DictionaryKeys(_) => { + unreachable!("`events` starts with a value or collection event") + } + } + + while let Some((event, rest)) = events.split_first_mut() { + events = rest; + match event { + Event::Collection(c) => { + let collection_events = &mut events[..c.skip]; + self.write_plist_collection( + c, + collection_events, + ref_size, + &mut next_object_ref, + &mut offset_table, + )?; + } + Event::Value(value_index) => { + self.write_plist_value(*value_index, &mut offset_table)?; + } + // Dictionary keys will have already been written in `write_plist_collection` so we + // skip over them here. + Event::DictionaryKeys(len) => { + events = &mut events[*len..]; + } + } + } + + // Write object offset table + let offset_table_offset = self.writer.pos; + let offset_size = plist_ref_size(offset_table_offset); + for &offset in &offset_table { + write_plist_ref(&mut self.writer, offset_size, offset)?; + } + + // Write trailer + // 6 zero bytes padding + // 1 byte offset size + // 1 byte object ref size + // 8 bytes number of objects + // 8 bytes root object ref (always zero) + // 8 bytes file offset of the object offset table + let mut trailer = [0; 32]; + trailer[6] = offset_size; + trailer[7] = ref_size; + trailer[8..16].copy_from_slice(&(self.num_objects as u64).to_be_bytes()); + trailer[24..32].copy_from_slice(&(offset_table_offset as u64).to_be_bytes()); + self.writer.write_exact(&trailer)?; + + self.writer + .flush() + .map_err(error::from_io_without_position)?; + + // Reset plist writer + self.writer.pos = 0; + events_vec.clear(); + self.events = events_vec; + self.values.clear(); + self.num_objects = 0; + + Ok(()) + } + + fn write_plist_collection( + &mut self, + collection: &Collection, + events: &mut [Event], + ref_size: u8, + next_object_ref: &mut ObjectRef, + offset_table: &mut Vec<usize>, + ) -> Result<(), Error> { + if let Some(object_ref) = &collection.object_ref { + offset_table[object_ref.value()] = self.writer.pos; + } else { + unreachable!("collection object refs are assigned before this function is called"); + } + + // Split the events in the current collection into keys and values (arrays contain only + // values). This is required as dictionary keys appear after values in the `events array + // but all keys must be written before any values. + let (keys, values, ty) = match collection.ty { + CollectionType::Array => (&mut [][..], events, 0xa0), + CollectionType::Dictionary => { + let keys_start_offset = events.len() - collection.len - 1; + let (values, keys) = events.split_at_mut(keys_start_offset); + (&mut keys[1..], values, 0xd0) + } + }; + let mut collection_events = keys.iter_mut().chain(values); + + // Collections are written as a length prefixed array of object references. For an array + // the length is the number of elements. For a dictionary it is the number of (key, value) + // pairs. + write_plist_value_ty_and_size(&mut self.writer, ty, collection.len)?; + while let Some(event) = collection_events.next() { + let object_ref = match event { + Event::Collection(c) => { + // We only want to write references to top level elements in the collection so + // we skip over the contents of any sub-collections. + if c.skip > 0 { + let _ = collection_events.nth(c.skip - 1); + } + + // Collections are not deduplicated so they must be assigned an object + // reference here. + assert!(c.object_ref.is_none()); + let object_ref = next_object_ref.clone_and_increment_self(); + c.object_ref = Some(object_ref.clone()); + object_ref + } + Event::Value(value_index) => { + // Values are deduplicated so we only assign an object reference if we have not + // already done so previously. + let (_, value_state) = value_mut(&mut self.values, *value_index); + match value_state { + ValueState::Unassigned => { + let object_ref = next_object_ref.clone_and_increment_self(); + *value_state = ValueState::Unwritten(object_ref.clone()); + object_ref + } + ValueState::Unwritten(object_ref) | ValueState::Written(object_ref) => { + object_ref.clone() + } + } + } + Event::DictionaryKeys(_) => unreachable!( + "`DictionaryKeys` events are specifically excluded from the iterator" + ), + }; + write_plist_ref(&mut self.writer, ref_size, object_ref.value())?; + } + + // We write dictionary keys here as they appear after values in the `events` array but + // should come before values in the plist stream to reduce seeking on read. + for key in keys { + if let Event::Value(value_index) = key { + self.write_plist_value(*value_index, offset_table)?; + } else { + unreachable!("dictionary keys are assigned as values in `write_end_collection`"); + } + } + + Ok(()) + } + + fn write_plist_value( + &mut self, + value_index: usize, + offset_table: &mut Vec<usize>, + ) -> Result<(), Error> { + let (value, value_state) = value_mut(&mut self.values, value_index); + + let object_ref = match value_state { + ValueState::Unassigned => { + unreachable!("value object refs are assigned before this function is called"); + } + ValueState::Unwritten(object_ref) => object_ref.clone(), + ValueState::Written(_) => return Ok(()), + }; + + offset_table[object_ref.value()] = self.writer.pos; + *value_state = ValueState::Written(object_ref); + + match value { + Value::Boolean(true) => { + self.writer.write_exact(&[0x09])?; + } + Value::Boolean(false) => { + self.writer.write_exact(&[0x08])?; + } + Value::Data(v) => { + write_plist_value_ty_and_size(&mut self.writer, 0x40, v.len())?; + self.writer.write_exact(&v[..])?; + } + Value::Date(v) => { + let secs = v.to_seconds_since_plist_epoch(); + let mut buf: [_; 9] = [0x33, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&secs.to_bits().to_be_bytes()); + self.writer.write_exact(&buf)?; + } + Value::Integer(v) => { + if let Some(v) = v.as_signed() { + if v >= 0 && v <= i64::from(u8::max_value()) { + self.writer.write_exact(&[0x10, v as u8])?; + } else if v >= 0 && v <= i64::from(u16::max_value()) { + let mut buf: [_; 3] = [0x11, 0, 0]; + buf[1..].copy_from_slice(&(v as u16).to_be_bytes()); + self.writer.write_exact(&buf)?; + } else if v >= 0 && v <= i64::from(u32::max_value()) { + let mut buf: [_; 5] = [0x12, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&(v as u32).to_be_bytes()); + self.writer.write_exact(&buf)?; + } else { + let mut buf: [_; 9] = [0x13, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&v.to_be_bytes()); + self.writer.write_exact(&buf)?; + } + } else if let Some(v) = v.as_unsigned() { + // `u64`s larger than `i64::max_value()` are stored as signed 128 bit + // integers. + let mut buf: [_; 17] = [0x14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&i128::from(v).to_be_bytes()); + self.writer.write_exact(&buf)?; + } else { + unreachable!("an integer can be represented as either an i64 or u64"); + } + } + Value::Real(v) => { + let mut buf: [_; 9] = [0x23, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&v.to_be_bytes()); + self.writer.write_exact(&buf)?; + } + Value::String(v) if v.is_ascii() => { + let ascii = v.as_bytes(); + write_plist_value_ty_and_size(&mut self.writer, 0x50, ascii.len())?; + self.writer.write_exact(ascii)?; + } + Value::String(v) => { + let utf16_len = v.encode_utf16().count(); + write_plist_value_ty_and_size(&mut self.writer, 0x60, utf16_len)?; + for c in v.encode_utf16() { + self.writer.write_exact(&c.to_be_bytes())?; + } + } + Value::Uid(v) => { + let v = v.get(); + if v <= u64::from(u8::max_value()) { + self.writer.write_exact(&[0x80, v as u8])?; + } else if v <= u64::from(u16::max_value()) { + let mut buf: [_; 3] = [0x81, 0, 0]; + buf[1..].copy_from_slice(&(v as u16).to_be_bytes()); + self.writer.write_exact(&buf)?; + } else if v <= u64::from(u32::max_value()) { + let mut buf: [_; 5] = [0x83, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&(v as u32).to_be_bytes()); + self.writer.write_exact(&buf)?; + } else { + let mut buf: [_; 9] = [0x87, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[1..].copy_from_slice(&(v as u64).to_be_bytes()); + self.writer.write_exact(&buf)?; + } + } + } + Ok(()) + } +} + +impl<W: Write> Writer for BinaryWriter<W> { + fn write_start_array(&mut self, _len: Option<u64>) -> Result<(), Error> { + self.write_start_collection(CollectionType::Array) + } + fn write_start_dictionary(&mut self, _len: Option<u64>) -> Result<(), Error> { + self.write_start_collection(CollectionType::Dictionary) + } + fn write_end_collection(&mut self) -> Result<(), Error> { + self.write_end_collection() + } + + fn write_boolean(&mut self, value: bool) -> Result<(), Error> { + self.write_value(Value::Boolean(value)) + } + fn write_data(&mut self, value: &[u8]) -> Result<(), Error> { + self.write_value(Value::Data(Cow::Borrowed(value))) + } + fn write_date(&mut self, value: Date) -> Result<(), Error> { + self.write_value(Value::Date(value)) + } + fn write_integer(&mut self, value: Integer) -> Result<(), Error> { + self.write_value(Value::Integer(value)) + } + fn write_real(&mut self, value: f64) -> Result<(), Error> { + self.write_value(Value::Real(value.to_bits())) + } + fn write_string(&mut self, value: &str) -> Result<(), Error> { + self.write_value(Value::String(Cow::Borrowed(value))) + } + fn write_uid(&mut self, value: Uid) -> Result<(), Error> { + self.write_value(Value::Uid(value)) + } +} + +fn is_even(value: usize) -> bool { + value & 1 == 0 +} + +fn value_mut<'a>( + values: &'a mut IndexMap<Value<'static>, ValueState>, + value_index: usize, +) -> (&'a mut Value<'static>, &'a mut ValueState) { + values + .get_index_mut(value_index) + .expect("internal consistency error") +} + +fn write_plist_value_ty_and_size( + writer: &mut PosWriter<impl Write>, + token: u8, + size: usize, +) -> Result<(), Error> { + if size < 0x0f { + writer.write_exact(&[token | (size as u8)])?; + } else if size <= u8::max_value() as usize { + writer.write_exact(&[token | 0x0f, 0x10, size as u8])?; + } else if size <= u16::max_value() as usize { + let mut buf: [_; 4] = [token | 0x0f, 0x11, 0, 0]; + buf[2..].copy_from_slice(&(size as u16).to_be_bytes()); + writer.write_exact(&buf)?; + } else if size <= u32::max_value() as usize { + let mut buf: [_; 6] = [token | 0x0f, 0x12, 0, 0, 0, 0]; + buf[2..].copy_from_slice(&(size as u32).to_be_bytes()); + writer.write_exact(&buf)?; + } else { + let mut buf: [_; 10] = [token | 0x0f, 0x13, 0, 0, 0, 0, 0, 0, 0, 0]; + buf[2..].copy_from_slice(&(size as u64).to_be_bytes()); + writer.write_exact(&buf)?; + } + Ok(()) +} + +fn plist_ref_size(max_value: usize) -> u8 { + let significant_bits = 64 - (max_value as u64).leading_zeros() as u8; + // Convert to number of bytes + let significant_bytes = (significant_bits + 7) / 8; + // Round up to the next integer byte size which must be power of two. + significant_bytes.next_power_of_two() +} + +fn write_plist_ref( + writer: &mut PosWriter<impl Write>, + ref_size: u8, + value: usize, +) -> Result<(), Error> { + match ref_size { + 1 => writer.write_exact(&[value as u8]), + 2 => writer.write_exact(&(value as u16).to_be_bytes()), + 4 => writer.write_exact(&(value as u32).to_be_bytes()), + 8 => writer.write_exact(&(value as u64).to_be_bytes()), + _ => unreachable!("`ref_size` is a power of two less than or equal to 8"), + } +} + +impl<W: Write> PosWriter<W> { + fn write_exact(&mut self, buf: &[u8]) -> Result<(), Error> { + self.write_all(buf) + .map_err(error::from_io_without_position)?; + Ok(()) + } +} + +impl<W: Write> Write for PosWriter<W> { + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + let count = self.writer.write(buf)?; + self.pos = self + .pos + .checked_add(count) + .expect("binary plist cannot be larger than `usize::max_value()` bytes"); + Ok(count) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.flush() + } +} + +impl ObjectRef { + fn zero() -> ObjectRef { + ObjectRef(NonZeroUsize::new(1).unwrap()) + } + + fn clone_and_increment_self(&mut self) -> ObjectRef { + let current = self.0; + self.0 = NonZeroUsize::new(current.get() + 1).unwrap(); + ObjectRef(current) + } + + fn value(&self) -> usize { + self.0.get() - 1 + } +} + +impl<'a> Value<'a> { + fn into_owned(self) -> Value<'static> { + match self { + Value::Boolean(v) => Value::Boolean(v), + Value::Data(v) => Value::Data(Cow::Owned(v.into_owned())), + Value::Date(v) => Value::Date(v), + Value::Integer(v) => Value::Integer(v), + Value::Real(v) => Value::Real(v), + Value::String(v) => Value::String(Cow::Owned(v.into_owned())), + Value::Uid(v) => Value::Uid(v), + } + } + + fn event_kind(&self) -> EventKind { + match self { + Value::Boolean(_) => EventKind::Boolean, + Value::Data(_) => EventKind::Data, + Value::Date(_) => EventKind::Date, + Value::Integer(_) => EventKind::Integer, + Value::Real(_) => EventKind::Real, + Value::String(_) => EventKind::String, + Value::Uid(_) => EventKind::Uid, + } + } +} + +#[cfg(test)] +mod tests { + use std::{fs::File, io::Cursor, path::Path}; + + use crate::{stream::BinaryReader, Value}; + + fn test_roundtrip(path: &Path) { + let reader = File::open(path).unwrap(); + let streaming_parser = BinaryReader::new(reader); + let value_to_encode = Value::from_events(streaming_parser).unwrap(); + + let mut buf = Cursor::new(Vec::new()); + value_to_encode.to_writer_binary(&mut buf).unwrap(); + + let buf_inner = buf.into_inner(); + + let streaming_parser = BinaryReader::new(Cursor::new(buf_inner)); + + let events: Vec<Result<_, _>> = streaming_parser.collect(); + let value_decoded_from_encode = Value::from_events(events.into_iter()).unwrap(); + + assert_eq!(value_to_encode, value_decoded_from_encode); + } + + #[test] + fn bplist_roundtrip() { + test_roundtrip(&Path::new("./tests/data/binary.plist")) + } + + #[test] + fn utf16_roundtrip() { + test_roundtrip(&Path::new("./tests/data/utf16_bplist.plist")) + } + + #[test] + fn nskeyedarchiver_roundtrip() { + test_roundtrip(&Path::new("./tests/data/binary_NSKeyedArchiver.plist")) + } +} diff --git a/third_party/rust/plist/src/stream/mod.rs b/third_party/rust/plist/src/stream/mod.rs new file mode 100644 index 0000000000..0fd9ec5f3e --- /dev/null +++ b/third_party/rust/plist/src/stream/mod.rs @@ -0,0 +1,266 @@ +//! An abstraction of a plist file as a stream of events. Used to support multiple encodings. + +mod binary_reader; +pub use self::binary_reader::BinaryReader; + +mod binary_writer; +pub use self::binary_writer::BinaryWriter; + +mod xml_reader; +pub use self::xml_reader::XmlReader; + +mod xml_writer; +pub use self::xml_writer::XmlWriter; + +use std::{ + borrow::Cow, + io::{self, Read, Seek, SeekFrom}, + vec, +}; + +use crate::{ + dictionary, + error::{Error, ErrorKind}, + Date, Integer, Uid, Value, +}; + +/// An encoding of a plist as a flat structure. +/// +/// Output by the event readers. +/// +/// Dictionary keys and values are represented as pairs of values e.g.: +/// +/// ```ignore rust +/// StartDictionary +/// String("Height") // Key +/// Real(181.2) // Value +/// String("Age") // Key +/// Integer(28) // Value +/// EndDictionary +/// ``` +/// +/// ## Lifetimes +/// +/// This type has a lifetime parameter; during serialization, data is borrowed +/// from a [`Value`], and the lifetime of the event is the lifetime of the +/// [`Value`] being serialized. +/// +/// During deserialization, data is always copied anyway, and this lifetime +/// is always `'static`. +#[derive(Clone, Debug, PartialEq)] +#[non_exhaustive] +pub enum Event<'a> { + // While the length of an array or dict cannot be feasably greater than max(usize) this better + // conveys the concept of an effectively unbounded event stream. + StartArray(Option<u64>), + StartDictionary(Option<u64>), + EndCollection, + + Boolean(bool), + Data(Cow<'a, [u8]>), + Date(Date), + Integer(Integer), + Real(f64), + String(Cow<'a, str>), + Uid(Uid), +} + +/// An owned [`Event`]. +/// +/// During deserialization, events are always owned; this type alias helps +/// keep that code a bit clearer. +pub type OwnedEvent = Event<'static>; + +/// An `Event` stream returned by `Value::into_events`. +pub struct Events<'a> { + stack: Vec<StackItem<'a>>, +} + +enum StackItem<'a> { + Root(&'a Value), + Array(std::slice::Iter<'a, Value>), + Dict(dictionary::Iter<'a>), + DictValue(&'a Value), +} + +/// Options for customizing serialization of XML plists. +#[derive(Clone, Debug)] +pub struct XmlWriteOptions { + indent_str: Cow<'static, str>, +} + +impl XmlWriteOptions { + /// Specify the sequence of characters used for indentation. + /// + /// This may be either an `&'static str` or an owned `String`. + /// + /// The default is `\t`. + pub fn indent_string(mut self, indent_str: impl Into<Cow<'static, str>>) -> Self { + self.indent_str = indent_str.into(); + self + } +} + +impl Default for XmlWriteOptions { + fn default() -> Self { + XmlWriteOptions { + indent_str: Cow::Borrowed("\t"), + } + } +} + +impl<'a> Events<'a> { + pub(crate) fn new(value: &'a Value) -> Events<'a> { + Events { + stack: vec![StackItem::Root(value)], + } + } +} + +impl<'a> Iterator for Events<'a> { + type Item = Event<'a>; + + fn next(&mut self) -> Option<Event<'a>> { + fn handle_value<'c, 'b: 'c>( + value: &'b Value, + stack: &'c mut Vec<StackItem<'b>>, + ) -> Event<'b> { + match value { + Value::Array(array) => { + let len = array.len(); + let iter = array.iter(); + stack.push(StackItem::Array(iter)); + Event::StartArray(Some(len as u64)) + } + Value::Dictionary(dict) => { + let len = dict.len(); + let iter = dict.into_iter(); + stack.push(StackItem::Dict(iter)); + Event::StartDictionary(Some(len as u64)) + } + Value::Boolean(value) => Event::Boolean(*value), + Value::Data(value) => Event::Data(Cow::Borrowed(&value)), + Value::Date(value) => Event::Date(*value), + Value::Real(value) => Event::Real(*value), + Value::Integer(value) => Event::Integer(*value), + Value::String(value) => Event::String(Cow::Borrowed(value.as_str())), + Value::Uid(value) => Event::Uid(*value), + } + } + + Some(match self.stack.pop()? { + StackItem::Root(value) => handle_value(value, &mut self.stack), + StackItem::Array(mut array) => { + if let Some(value) = array.next() { + // There might still be more items in the array so return it to the stack. + self.stack.push(StackItem::Array(array)); + handle_value(value, &mut self.stack) + } else { + Event::EndCollection + } + } + StackItem::Dict(mut dict) => { + if let Some((key, value)) = dict.next() { + // There might still be more items in the dictionary so return it to the stack. + self.stack.push(StackItem::Dict(dict)); + // The next event to be returned must be the dictionary value. + self.stack.push(StackItem::DictValue(value)); + // Return the key event now. + Event::String(Cow::Borrowed(key)) + } else { + Event::EndCollection + } + } + StackItem::DictValue(value) => handle_value(value, &mut self.stack), + }) + } +} + +pub struct Reader<R: Read + Seek>(ReaderInner<R>); + +enum ReaderInner<R: Read + Seek> { + Uninitialized(Option<R>), + Xml(XmlReader<R>), + Binary(BinaryReader<R>), +} + +impl<R: Read + Seek> Reader<R> { + pub fn new(reader: R) -> Reader<R> { + Reader(ReaderInner::Uninitialized(Some(reader))) + } + + fn is_binary(reader: &mut R) -> Result<bool, Error> { + fn from_io_offset_0(err: io::Error) -> Error { + ErrorKind::Io(err).with_byte_offset(0) + } + + reader.seek(SeekFrom::Start(0)).map_err(from_io_offset_0)?; + let mut magic = [0; 8]; + reader.read_exact(&mut magic).map_err(from_io_offset_0)?; + reader.seek(SeekFrom::Start(0)).map_err(from_io_offset_0)?; + + Ok(&magic == b"bplist00") + } +} + +impl<R: Read + Seek> Iterator for Reader<R> { + type Item = Result<OwnedEvent, Error>; + + fn next(&mut self) -> Option<Result<OwnedEvent, Error>> { + let mut reader = match self.0 { + ReaderInner::Xml(ref mut parser) => return parser.next(), + ReaderInner::Binary(ref mut parser) => return parser.next(), + ReaderInner::Uninitialized(ref mut reader) => reader.take().unwrap(), + }; + + match Reader::is_binary(&mut reader) { + Ok(true) => self.0 = ReaderInner::Binary(BinaryReader::new(reader)), + Ok(false) => self.0 = ReaderInner::Xml(XmlReader::new(reader)), + Err(err) => { + self.0 = ReaderInner::Uninitialized(Some(reader)); + return Some(Err(err)); + } + } + + self.next() + } +} + +/// Supports writing event streams in different plist encodings. +pub trait Writer: private::Sealed { + fn write(&mut self, event: &Event) -> Result<(), Error> { + match event { + Event::StartArray(len) => self.write_start_array(*len), + Event::StartDictionary(len) => self.write_start_dictionary(*len), + Event::EndCollection => self.write_end_collection(), + Event::Boolean(value) => self.write_boolean(*value), + Event::Data(value) => self.write_data(value), + Event::Date(value) => self.write_date(*value), + Event::Integer(value) => self.write_integer(*value), + Event::Real(value) => self.write_real(*value), + Event::String(value) => self.write_string(value), + Event::Uid(value) => self.write_uid(*value), + } + } + + fn write_start_array(&mut self, len: Option<u64>) -> Result<(), Error>; + fn write_start_dictionary(&mut self, len: Option<u64>) -> Result<(), Error>; + fn write_end_collection(&mut self) -> Result<(), Error>; + + fn write_boolean(&mut self, value: bool) -> Result<(), Error>; + fn write_data(&mut self, value: &[u8]) -> Result<(), Error>; + fn write_date(&mut self, value: Date) -> Result<(), Error>; + fn write_integer(&mut self, value: Integer) -> Result<(), Error>; + fn write_real(&mut self, value: f64) -> Result<(), Error>; + fn write_string(&mut self, value: &str) -> Result<(), Error>; + fn write_uid(&mut self, value: Uid) -> Result<(), Error>; +} + +pub(crate) mod private { + use std::io::Write; + + pub trait Sealed {} + + impl<W: Write> Sealed for super::BinaryWriter<W> {} + impl<W: Write> Sealed for super::XmlWriter<W> {} +} diff --git a/third_party/rust/plist/src/stream/xml_reader.rs b/third_party/rust/plist/src/stream/xml_reader.rs new file mode 100644 index 0000000000..31c0d39701 --- /dev/null +++ b/third_party/rust/plist/src/stream/xml_reader.rs @@ -0,0 +1,275 @@ +use base64; +use std::{ + io::{self, Read}, + str::FromStr, +}; +use xml_rs::{ + common::{is_whitespace_str, Position}, + reader::{ + Error as XmlReaderError, ErrorKind as XmlReaderErrorKind, EventReader, ParserConfig, + XmlEvent, + }, +}; + +use crate::{ + error::{Error, ErrorKind, FilePosition}, + stream::{Event, OwnedEvent}, + Date, Integer, +}; + +pub struct XmlReader<R: Read> { + xml_reader: EventReader<R>, + queued_event: Option<XmlEvent>, + element_stack: Vec<String>, + finished: bool, +} + +impl<R: Read> XmlReader<R> { + pub fn new(reader: R) -> XmlReader<R> { + let config = ParserConfig::new() + .trim_whitespace(false) + .whitespace_to_characters(true) + .cdata_to_characters(true) + .ignore_comments(true) + .coalesce_characters(true); + + XmlReader { + xml_reader: EventReader::new_with_config(reader, config), + queued_event: None, + element_stack: Vec::new(), + finished: false, + } + } + + fn read_content(&mut self) -> Result<String, Error> { + loop { + match self.xml_reader.next() { + Ok(XmlEvent::Characters(s)) => return Ok(s), + Ok(event @ XmlEvent::EndElement { .. }) => { + self.queued_event = Some(event); + return Ok("".to_owned()); + } + Ok(XmlEvent::EndDocument) => { + return Err(self.with_pos(ErrorKind::UnclosedXmlElement)) + } + Ok(XmlEvent::StartElement { .. }) => { + return Err(self.with_pos(ErrorKind::UnexpectedXmlOpeningTag)); + } + Ok(XmlEvent::ProcessingInstruction { .. }) => (), + Ok(XmlEvent::StartDocument { .. }) + | Ok(XmlEvent::CData(_)) + | Ok(XmlEvent::Comment(_)) + | Ok(XmlEvent::Whitespace(_)) => { + unreachable!("parser does not output CData, Comment or Whitespace events"); + } + Err(err) => return Err(from_xml_error(err)), + } + } + } + + fn next_event(&mut self) -> Result<XmlEvent, XmlReaderError> { + if let Some(event) = self.queued_event.take() { + Ok(event) + } else { + self.xml_reader.next() + } + } + + fn read_next(&mut self) -> Result<Option<OwnedEvent>, Error> { + loop { + match self.next_event() { + Ok(XmlEvent::StartDocument { .. }) => {} + Ok(XmlEvent::StartElement { name, .. }) => { + // Add the current element to the element stack + self.element_stack.push(name.local_name.clone()); + + match &name.local_name[..] { + "plist" => (), + "array" => return Ok(Some(Event::StartArray(None))), + "dict" => return Ok(Some(Event::StartDictionary(None))), + "key" => return Ok(Some(Event::String(self.read_content()?.into()))), + "true" => return Ok(Some(Event::Boolean(true))), + "false" => return Ok(Some(Event::Boolean(false))), + "data" => { + let mut s = self.read_content()?; + // Strip whitespace and line endings from input string + s.retain(|c| !c.is_ascii_whitespace()); + let data = base64::decode(&s) + .map_err(|_| self.with_pos(ErrorKind::InvalidDataString))?; + return Ok(Some(Event::Data(data.into()))); + } + "date" => { + let s = self.read_content()?; + let date = Date::from_rfc3339(&s) + .map_err(|()| self.with_pos(ErrorKind::InvalidDateString))?; + return Ok(Some(Event::Date(date))); + } + "integer" => { + let s = self.read_content()?; + match Integer::from_str(&s) { + Ok(i) => return Ok(Some(Event::Integer(i))), + Err(_) => { + return Err(self.with_pos(ErrorKind::InvalidIntegerString)) + } + } + } + "real" => { + let s = self.read_content()?; + match f64::from_str(&s) { + Ok(f) => return Ok(Some(Event::Real(f))), + Err(_) => return Err(self.with_pos(ErrorKind::InvalidRealString)), + } + } + "string" => return Ok(Some(Event::String(self.read_content()?.into()))), + _ => return Err(self.with_pos(ErrorKind::UnknownXmlElement)), + } + } + Ok(XmlEvent::EndElement { name, .. }) => { + // Check the corrent element is being closed + match self.element_stack.pop() { + Some(ref open_name) if &name.local_name == open_name => (), + Some(ref _open_name) => { + return Err(self.with_pos(ErrorKind::UnclosedXmlElement)) + } + None => return Err(self.with_pos(ErrorKind::UnpairedXmlClosingTag)), + } + + match &name.local_name[..] { + "array" | "dict" => return Ok(Some(Event::EndCollection)), + "plist" | _ => (), + } + } + Ok(XmlEvent::EndDocument) => { + if self.element_stack.is_empty() { + return Ok(None); + } else { + return Err(self.with_pos(ErrorKind::UnclosedXmlElement)); + } + } + + Ok(XmlEvent::Characters(c)) => { + if !is_whitespace_str(&c) { + return Err( + self.with_pos(ErrorKind::UnexpectedXmlCharactersExpectedElement) + ); + } + } + Ok(XmlEvent::CData(_)) | Ok(XmlEvent::Comment(_)) | Ok(XmlEvent::Whitespace(_)) => { + unreachable!("parser does not output CData, Comment or Whitespace events") + } + Ok(XmlEvent::ProcessingInstruction { .. }) => (), + Err(err) => return Err(from_xml_error(err)), + } + } + } + + fn with_pos(&self, kind: ErrorKind) -> Error { + kind.with_position(convert_xml_pos(self.xml_reader.position())) + } +} + +impl<R: Read> Iterator for XmlReader<R> { + type Item = Result<OwnedEvent, Error>; + + fn next(&mut self) -> Option<Result<OwnedEvent, Error>> { + if self.finished { + None + } else { + match self.read_next() { + Ok(Some(event)) => Some(Ok(event)), + Ok(None) => { + self.finished = true; + None + } + Err(err) => { + self.finished = true; + Some(Err(err)) + } + } + } + } +} + +fn convert_xml_pos(pos: xml_rs::common::TextPosition) -> FilePosition { + // TODO: pos.row and pos.column counts from 0. what do we want to do? + FilePosition::LineColumn(pos.row, pos.column) +} + +fn from_xml_error(err: XmlReaderError) -> Error { + let kind = match err.kind() { + XmlReaderErrorKind::Io(err) if err.kind() == io::ErrorKind::UnexpectedEof => { + ErrorKind::UnexpectedEof + } + XmlReaderErrorKind::Io(err) => { + let err = if let Some(code) = err.raw_os_error() { + io::Error::from_raw_os_error(code) + } else { + io::Error::new(err.kind(), err.to_string()) + }; + ErrorKind::Io(err) + } + XmlReaderErrorKind::Syntax(_) => ErrorKind::InvalidXmlSyntax, + XmlReaderErrorKind::UnexpectedEof => ErrorKind::UnexpectedEof, + XmlReaderErrorKind::Utf8(_) => ErrorKind::InvalidXmlUtf8, + }; + + kind.with_position(convert_xml_pos(err.position())) +} + +#[cfg(test)] +mod tests { + use std::{fs::File, path::Path}; + + use super::*; + use crate::stream::Event::{self, *}; + + #[test] + fn streaming_parser() { + let reader = File::open(&Path::new("./tests/data/xml.plist")).unwrap(); + let streaming_parser = XmlReader::new(reader); + let events: Vec<Event> = streaming_parser.map(|e| e.unwrap()).collect(); + + let comparison = &[ + StartDictionary(None), + String("Author".into()), + String("William Shakespeare".into()), + String("Lines".into()), + StartArray(None), + String("It is a tale told by an idiot,".into()), + String("Full of sound and fury, signifying nothing.".into()), + EndCollection, + String("Death".into()), + Integer(1564.into()), + String("Height".into()), + Real(1.60), + String("Data".into()), + Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()), + String("Birthdate".into()), + Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()), + String("Blank".into()), + String("".into()), + String("BiggestNumber".into()), + Integer(18446744073709551615u64.into()), + String("SmallestNumber".into()), + Integer((-9223372036854775808i64).into()), + String("HexademicalNumber".into()), + Integer(0xdead_beef_u64.into()), + String("IsTrue".into()), + Boolean(true), + String("IsNotFalse".into()), + Boolean(false), + EndCollection, + ]; + + assert_eq!(events, comparison); + } + + #[test] + fn bad_data() { + let reader = File::open(&Path::new("./tests/data/xml_error.plist")).unwrap(); + let streaming_parser = XmlReader::new(reader); + let events: Vec<_> = streaming_parser.collect(); + + assert!(events.last().unwrap().is_err()); + } +} diff --git a/third_party/rust/plist/src/stream/xml_writer.rs b/third_party/rust/plist/src/stream/xml_writer.rs new file mode 100644 index 0000000000..703435370d --- /dev/null +++ b/third_party/rust/plist/src/stream/xml_writer.rs @@ -0,0 +1,391 @@ +use base64; +use line_wrap; +use std::{borrow::Cow, io::Write}; +use xml_rs::{ + name::Name, + namespace::Namespace, + writer::{EmitterConfig, Error as XmlWriterError, EventWriter, XmlEvent}, +}; + +use crate::{ + error::{self, Error, ErrorKind, EventKind}, + stream::{Writer, XmlWriteOptions}, + Date, Integer, Uid, +}; + +static XML_PROLOGUE: &str = r#"<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +"#; + +#[derive(PartialEq)] +enum Element { + Dictionary, + Array, +} + +pub struct XmlWriter<W: Write> { + xml_writer: EventWriter<W>, + stack: Vec<Element>, + expecting_key: bool, + written_prologue: bool, + // Not very nice + empty_namespace: Namespace, +} + +impl<W: Write> XmlWriter<W> { + pub fn new(writer: W) -> XmlWriter<W> { + let opts = XmlWriteOptions::default(); + XmlWriter::new_with_options(writer, &opts) + } + + pub fn new_with_options(writer: W, opts: &XmlWriteOptions) -> XmlWriter<W> { + let config = EmitterConfig::new() + .line_separator("\n") + .indent_string(opts.indent_str.clone()) + .perform_indent(true) + .write_document_declaration(false) + .normalize_empty_elements(true) + .cdata_to_characters(true) + .keep_element_names_stack(false) + .autopad_comments(true) + .pad_self_closing(false); + + XmlWriter { + xml_writer: EventWriter::new_with_config(writer, config), + stack: Vec::new(), + expecting_key: false, + written_prologue: false, + empty_namespace: Namespace::empty(), + } + } + + fn write_element_and_value(&mut self, name: &str, value: &str) -> Result<(), Error> { + self.start_element(name)?; + self.write_value(value)?; + self.end_element(name)?; + Ok(()) + } + + fn start_element(&mut self, name: &str) -> Result<(), Error> { + self.xml_writer + .write(XmlEvent::StartElement { + name: Name::local(name), + attributes: Cow::Borrowed(&[]), + namespace: Cow::Borrowed(&self.empty_namespace), + }) + .map_err(from_xml_error)?; + Ok(()) + } + + fn end_element(&mut self, name: &str) -> Result<(), Error> { + self.xml_writer + .write(XmlEvent::EndElement { + name: Some(Name::local(name)), + }) + .map_err(from_xml_error)?; + Ok(()) + } + + fn write_value(&mut self, value: &str) -> Result<(), Error> { + self.xml_writer + .write(XmlEvent::Characters(value)) + .map_err(from_xml_error)?; + Ok(()) + } + + pub fn into_inner(self) -> W { + self.xml_writer.into_inner() + } + + fn write_event<F: FnOnce(&mut Self) -> Result<(), Error>>( + &mut self, + f: F, + ) -> Result<(), Error> { + if !self.written_prologue { + self.xml_writer + .inner_mut() + .write_all(XML_PROLOGUE.as_bytes()) + .map_err(error::from_io_without_position)?; + + self.written_prologue = true; + } + + f(self)?; + + // If there are no more open tags then write the </plist> element + if self.stack.is_empty() { + // We didn't tell the xml_writer about the <plist> tag so we'll skip telling it + // about the </plist> tag as well. + self.xml_writer + .inner_mut() + .write_all(b"\n</plist>") + .map_err(error::from_io_without_position)?; + self.xml_writer + .inner_mut() + .flush() + .map_err(error::from_io_without_position)?; + } + + Ok(()) + } + + fn write_value_event<F: FnOnce(&mut Self) -> Result<(), Error>>( + &mut self, + event_kind: EventKind, + f: F, + ) -> Result<(), Error> { + self.write_event(|this| { + if this.expecting_key { + return Err(ErrorKind::UnexpectedEventType { + expected: EventKind::DictionaryKeyOrEndCollection, + found: event_kind, + } + .without_position()); + } + f(this)?; + this.expecting_key = this.stack.last() == Some(&Element::Dictionary); + Ok(()) + }) + } +} + +impl<W: Write> Writer for XmlWriter<W> { + fn write_start_array(&mut self, _len: Option<u64>) -> Result<(), Error> { + self.write_value_event(EventKind::StartArray, |this| { + this.start_element("array")?; + this.stack.push(Element::Array); + Ok(()) + }) + } + + fn write_start_dictionary(&mut self, _len: Option<u64>) -> Result<(), Error> { + self.write_value_event(EventKind::StartDictionary, |this| { + this.start_element("dict")?; + this.stack.push(Element::Dictionary); + Ok(()) + }) + } + + fn write_end_collection(&mut self) -> Result<(), Error> { + self.write_event(|this| { + match (this.stack.pop(), this.expecting_key) { + (Some(Element::Dictionary), true) => { + this.end_element("dict")?; + } + (Some(Element::Array), _) => { + this.end_element("array")?; + } + (Some(Element::Dictionary), false) | (None, _) => { + return Err(ErrorKind::UnexpectedEventType { + expected: EventKind::ValueOrStartCollection, + found: EventKind::EndCollection, + } + .without_position()); + } + } + this.expecting_key = this.stack.last() == Some(&Element::Dictionary); + Ok(()) + }) + } + + fn write_boolean(&mut self, value: bool) -> Result<(), Error> { + self.write_value_event(EventKind::Boolean, |this| { + let value_str = if value { "true" } else { "false" }; + this.start_element(value_str)?; + this.end_element(value_str) + }) + } + + fn write_data(&mut self, value: &[u8]) -> Result<(), Error> { + self.write_value_event(EventKind::Data, |this| { + let base64_data = base64_encode_plist(&value, this.stack.len()); + this.write_element_and_value("data", &base64_data) + }) + } + + fn write_date(&mut self, value: Date) -> Result<(), Error> { + self.write_value_event(EventKind::Date, |this| { + this.write_element_and_value("date", &value.to_rfc3339()) + }) + } + + fn write_integer(&mut self, value: Integer) -> Result<(), Error> { + self.write_value_event(EventKind::Integer, |this| { + this.write_element_and_value("integer", &value.to_string()) + }) + } + + fn write_real(&mut self, value: f64) -> Result<(), Error> { + self.write_value_event(EventKind::Real, |this| { + this.write_element_and_value("real", &value.to_string()) + }) + } + + fn write_string(&mut self, value: &str) -> Result<(), Error> { + self.write_event(|this| { + if this.expecting_key { + this.write_element_and_value("key", &*value)?; + this.expecting_key = false; + } else { + this.write_element_and_value("string", &*value)?; + this.expecting_key = this.stack.last() == Some(&Element::Dictionary); + } + Ok(()) + }) + } + + fn write_uid(&mut self, _value: Uid) -> Result<(), Error> { + Err(ErrorKind::UidNotSupportedInXmlPlist.without_position()) + } +} + +pub(crate) fn from_xml_error(err: XmlWriterError) -> Error { + match err { + XmlWriterError::Io(err) => ErrorKind::Io(err).without_position(), + XmlWriterError::DocumentStartAlreadyEmitted + | XmlWriterError::LastElementNameNotAvailable + | XmlWriterError::EndElementNameIsNotEqualToLastStartElementName + | XmlWriterError::EndElementNameIsNotSpecified => unreachable!(), + } +} + +fn base64_encode_plist(data: &[u8], indent: usize) -> String { + // XML plist data elements are always formatted by apple tools as + // <data> + // AAAA..AA (68 characters per line) + // </data> + // Allocate space for base 64 string and line endings up front + const LINE_LEN: usize = 68; + let mut line_ending = Vec::with_capacity(1 + indent); + line_ending.push(b'\n'); + (0..indent).for_each(|_| line_ending.push(b'\t')); + + // Find the max length of `data` encoded as a base 64 string with padding + let base64_max_string_len = data.len() * 4 / 3 + 4; + + // Find the max length of the formatted base 64 string as: max length of the base 64 string + // + line endings and indents at the start of the string and after every line + let base64_max_string_len_with_formatting = + base64_max_string_len + (2 + base64_max_string_len / LINE_LEN) * line_ending.len(); + + let mut output = vec![0; base64_max_string_len_with_formatting]; + + // Start output with a line ending and indent + output[..line_ending.len()].copy_from_slice(&line_ending); + + // Encode `data` as a base 64 string + let base64_string_len = + base64::encode_config_slice(data, base64::STANDARD, &mut output[line_ending.len()..]); + + // Line wrap the base 64 encoded string + let line_wrap_len = line_wrap::line_wrap( + &mut output[line_ending.len()..], + base64_string_len, + LINE_LEN, + &line_wrap::SliceLineEnding::new(&line_ending), + ); + + // Add the final line ending and indent + output[line_ending.len() + base64_string_len + line_wrap_len..][..line_ending.len()] + .copy_from_slice(&line_ending); + + // Ensure output is the correct length + output.truncate(base64_string_len + line_wrap_len + 2 * line_ending.len()); + String::from_utf8(output).expect("base 64 string must be valid utf8") +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + use crate::stream::Event; + + #[test] + fn streaming_parser() { + let plist = &[ + Event::StartDictionary(None), + Event::String("Author".into()), + Event::String("William Shakespeare".into()), + Event::String("Lines".into()), + Event::StartArray(None), + Event::String("It is a tale told by an idiot,".into()), + Event::String("Full of sound and fury, signifying nothing.".into()), + Event::Data((0..128).collect::<Vec<_>>().into()), + Event::EndCollection, + Event::String("Death".into()), + Event::Integer(1564.into()), + Event::String("Height".into()), + Event::Real(1.60), + Event::String("Data".into()), + Event::Data(vec![0, 0, 0, 190, 0, 0, 0, 3, 0, 0, 0, 30, 0, 0, 0].into()), + Event::String("Birthdate".into()), + Event::Date(super::Date::from_rfc3339("1981-05-16T11:32:06Z").unwrap()), + Event::String("Comment".into()), + Event::String("2 < 3".into()), // make sure characters are escaped + Event::String("BiggestNumber".into()), + Event::Integer(18446744073709551615u64.into()), + Event::String("SmallestNumber".into()), + Event::Integer((-9223372036854775808i64).into()), + Event::String("IsTrue".into()), + Event::Boolean(true), + Event::String("IsNotFalse".into()), + Event::Boolean(false), + Event::EndCollection, + ]; + + let mut cursor = Cursor::new(Vec::new()); + + { + let mut plist_w = XmlWriter::new(&mut cursor); + + for item in plist { + plist_w.write(item).unwrap(); + } + } + + let comparison = "<?xml version=\"1.0\" encoding=\"UTF-8\"?> +<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\"> +<plist version=\"1.0\"> +<dict> +\t<key>Author</key> +\t<string>William Shakespeare</string> +\t<key>Lines</key> +\t<array> +\t\t<string>It is a tale told by an idiot,</string> +\t\t<string>Full of sound and fury, signifying nothing.</string> +\t\t<data> +\t\tAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEy +\t\tMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2Rl +\t\tZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8= +\t\t</data> +\t</array> +\t<key>Death</key> +\t<integer>1564</integer> +\t<key>Height</key> +\t<real>1.6</real> +\t<key>Data</key> +\t<data> +\tAAAAvgAAAAMAAAAeAAAA +\t</data> +\t<key>Birthdate</key> +\t<date>1981-05-16T11:32:06Z</date> +\t<key>Comment</key> +\t<string>2 < 3</string> +\t<key>BiggestNumber</key> +\t<integer>18446744073709551615</integer> +\t<key>SmallestNumber</key> +\t<integer>-9223372036854775808</integer> +\t<key>IsTrue</key> +\t<true/> +\t<key>IsNotFalse</key> +\t<false/> +</dict> +</plist>"; + + let s = String::from_utf8(cursor.into_inner()).unwrap(); + + assert_eq!(s, comparison); + } +} |