From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_serialize/src/collection_impls.rs | 267 ++++++++ compiler/rustc_serialize/src/leb128.rs | 163 +++++ compiler/rustc_serialize/src/lib.rs | 28 + compiler/rustc_serialize/src/opaque.rs | 750 +++++++++++++++++++++++ compiler/rustc_serialize/src/serialize.rs | 469 ++++++++++++++ 5 files changed, 1677 insertions(+) create mode 100644 compiler/rustc_serialize/src/collection_impls.rs create mode 100644 compiler/rustc_serialize/src/leb128.rs create mode 100644 compiler/rustc_serialize/src/lib.rs create mode 100644 compiler/rustc_serialize/src/opaque.rs create mode 100644 compiler/rustc_serialize/src/serialize.rs (limited to 'compiler/rustc_serialize/src') diff --git a/compiler/rustc_serialize/src/collection_impls.rs b/compiler/rustc_serialize/src/collection_impls.rs new file mode 100644 index 000000000..5e53f0b10 --- /dev/null +++ b/compiler/rustc_serialize/src/collection_impls.rs @@ -0,0 +1,267 @@ +//! Implementations of serialization for structures found in liballoc + +use std::hash::{BuildHasher, Hash}; + +use crate::{Decodable, Decoder, Encodable, Encoder}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, LinkedList, VecDeque}; +use std::rc::Rc; +use std::sync::Arc; + +use smallvec::{Array, SmallVec}; + +impl>> Encodable for SmallVec { + fn encode(&self, s: &mut S) { + let slice: &[A::Item] = self; + slice.encode(s); + } +} + +impl>> Decodable for SmallVec { + fn decode(d: &mut D) -> SmallVec { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl> Encodable for LinkedList { + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Decodable for LinkedList { + fn decode(d: &mut D) -> LinkedList { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl> Encodable for VecDeque { + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Decodable for VecDeque { + fn decode(d: &mut D) -> VecDeque { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl Encodable for BTreeMap +where + K: Encodable + PartialEq + Ord, + V: Encodable, +{ + fn encode(&self, e: &mut S) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for BTreeMap +where + K: Decodable + PartialEq + Ord, + V: Decodable, +{ + fn decode(d: &mut D) -> BTreeMap { + let len = d.read_usize(); + let mut map = BTreeMap::new(); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for BTreeSet +where + T: Encodable + PartialEq + Ord, +{ + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for BTreeSet +where + T: Decodable + PartialEq + Ord, +{ + fn decode(d: &mut D) -> BTreeSet { + let len = d.read_usize(); + let mut set = BTreeSet::new(); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl Encodable for HashMap +where + K: Encodable + Eq, + V: Encodable, + S: BuildHasher, +{ + fn encode(&self, e: &mut E) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for HashMap +where + K: Decodable + Hash + Eq, + V: Decodable, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> HashMap { + let len = d.read_usize(); + let state = Default::default(); + let mut map = HashMap::with_capacity_and_hasher(len, state); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for HashSet +where + T: Encodable + Eq, + S: BuildHasher, +{ + fn encode(&self, s: &mut E) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for HashSet +where + T: Decodable + Hash + Eq, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> HashSet { + let len = d.read_usize(); + let state = Default::default(); + let mut set = HashSet::with_capacity_and_hasher(len, state); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl Encodable for indexmap::IndexMap +where + K: Encodable + Hash + Eq, + V: Encodable, + S: BuildHasher, +{ + fn encode(&self, e: &mut E) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for indexmap::IndexMap +where + K: Decodable + Hash + Eq, + V: Decodable, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> indexmap::IndexMap { + let len = d.read_usize(); + let state = Default::default(); + let mut map = indexmap::IndexMap::with_capacity_and_hasher(len, state); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for indexmap::IndexSet +where + T: Encodable + Hash + Eq, + S: BuildHasher, +{ + fn encode(&self, s: &mut E) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for indexmap::IndexSet +where + T: Decodable + Hash + Eq, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> indexmap::IndexSet { + let len = d.read_usize(); + let state = Default::default(); + let mut set = indexmap::IndexSet::with_capacity_and_hasher(len, state); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl> Encodable for Rc<[T]> { + fn encode(&self, s: &mut E) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Rc<[T]> { + fn decode(d: &mut D) -> Rc<[T]> { + let vec: Vec = Decodable::decode(d); + vec.into() + } +} + +impl> Encodable for Arc<[T]> { + fn encode(&self, s: &mut E) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Arc<[T]> { + fn decode(d: &mut D) -> Arc<[T]> { + let vec: Vec = Decodable::decode(d); + vec.into() + } +} diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs new file mode 100644 index 000000000..08b3c0542 --- /dev/null +++ b/compiler/rustc_serialize/src/leb128.rs @@ -0,0 +1,163 @@ +#![macro_use] + +macro_rules! max_leb128_len { + ($int_ty:ty) => { + // The longest LEB128 encoding for an integer uses 7 bits per byte. + (std::mem::size_of::<$int_ty>() * 8 + 6) / 7 + }; +} + +// Returns the longest LEB128 encoding of all supported integer types. +pub const fn max_leb128_len() -> usize { + max_leb128_len!(u128) +} + +macro_rules! impl_write_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; + + loop { + if value < 0x80 { + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = value as u8; + } + + i += 1; + break; + } else { + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = ((value & 0x7f) | 0x80) as u8; + } + + value >>= 7; + i += 1; + } + } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } + } + }; +} + +impl_write_unsigned_leb128!(write_u16_leb128, u16); +impl_write_unsigned_leb128!(write_u32_leb128, u32); +impl_write_unsigned_leb128!(write_u64_leb128, u64); +impl_write_unsigned_leb128!(write_u128_leb128, u128); +impl_write_unsigned_leb128!(write_usize_leb128, usize); + +macro_rules! impl_read_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty { + // The first iteration of this loop is unpeeled. This is a + // performance win because this code is hot and integer values less + // than 128 are very common, typically occurring 50-80% or more of + // the time, even for u64 and u128. + let byte = slice[*position]; + *position += 1; + if (byte & 0x80) == 0 { + return byte as $int_ty; + } + let mut result = (byte & 0x7F) as $int_ty; + let mut shift = 7; + loop { + let byte = slice[*position]; + *position += 1; + if (byte & 0x80) == 0 { + result |= (byte as $int_ty) << shift; + return result; + } else { + result |= ((byte & 0x7F) as $int_ty) << shift; + } + shift += 7; + } + } + }; +} + +impl_read_unsigned_leb128!(read_u16_leb128, u16); +impl_read_unsigned_leb128!(read_u32_leb128, u32); +impl_read_unsigned_leb128!(read_u64_leb128, u64); +impl_read_unsigned_leb128!(read_u128_leb128, u128); +impl_read_unsigned_leb128!(read_usize_leb128, usize); + +macro_rules! impl_write_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; + + loop { + let mut byte = (value as u8) & 0x7f; + value >>= 7; + let more = !(((value == 0) && ((byte & 0x40) == 0)) + || ((value == -1) && ((byte & 0x40) != 0))); + + if more { + byte |= 0x80; // Mark this byte to show that more bytes will follow. + } + + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = byte; + } + + i += 1; + + if !more { + break; + } + } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } + } + }; +} + +impl_write_signed_leb128!(write_i16_leb128, i16); +impl_write_signed_leb128!(write_i32_leb128, i32); +impl_write_signed_leb128!(write_i64_leb128, i64); +impl_write_signed_leb128!(write_i128_leb128, i128); +impl_write_signed_leb128!(write_isize_leb128, isize); + +macro_rules! impl_read_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty { + let mut result = 0; + let mut shift = 0; + let mut byte; + + loop { + byte = slice[*position]; + *position += 1; + result |= <$int_ty>::from(byte & 0x7F) << shift; + shift += 7; + + if (byte & 0x80) == 0 { + break; + } + } + + if (shift < <$int_ty>::BITS) && ((byte & 0x40) != 0) { + // sign extend + result |= (!0 << shift); + } + + result + } + }; +} + +impl_read_signed_leb128!(read_i16_leb128, i16); +impl_read_signed_leb128!(read_i32_leb128, i32); +impl_read_signed_leb128!(read_i64_leb128, i64); +impl_read_signed_leb128!(read_i128_leb128, i128); +impl_read_signed_leb128!(read_isize_leb128, isize); diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs new file mode 100644 index 000000000..e606f4273 --- /dev/null +++ b/compiler/rustc_serialize/src/lib.rs @@ -0,0 +1,28 @@ +//! Support code for encoding and decoding types. + +/* +Core encoding and decoding interfaces. +*/ + +#![doc( + html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/", + html_playground_url = "https://play.rust-lang.org/", + test(attr(allow(unused_variables), deny(warnings))) +)] +#![feature(never_type)] +#![feature(associated_type_bounds)] +#![feature(min_specialization)] +#![feature(core_intrinsics)] +#![feature(maybe_uninit_slice)] +#![feature(let_else)] +#![feature(new_uninit)] +#![cfg_attr(test, feature(test))] +#![allow(rustc::internal)] + +pub use self::serialize::{Decodable, Decoder, Encodable, Encoder}; + +mod collection_impls; +mod serialize; + +pub mod leb128; +pub mod opaque; diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs new file mode 100644 index 000000000..5c17ef6ac --- /dev/null +++ b/compiler/rustc_serialize/src/opaque.rs @@ -0,0 +1,750 @@ +use crate::leb128::{self, max_leb128_len}; +use crate::serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::convert::TryInto; +use std::fs::File; +use std::io::{self, Write}; +use std::mem::MaybeUninit; +use std::path::Path; +use std::ptr; + +// ----------------------------------------------------------------------------- +// Encoder +// ----------------------------------------------------------------------------- + +pub struct MemEncoder { + pub data: Vec, +} + +impl MemEncoder { + pub fn new() -> MemEncoder { + MemEncoder { data: vec![] } + } + + #[inline] + pub fn position(&self) -> usize { + self.data.len() + } + + pub fn finish(self) -> Vec { + self.data + } +} + +macro_rules! write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + let old_len = $enc.data.len(); + + if MAX_ENCODED_LEN > $enc.data.capacity() - old_len { + $enc.data.reserve(MAX_ENCODED_LEN); + } + + // SAFETY: The above check and `reserve` ensures that there is enough + // room to write the encoded value to the vector's internal buffer. + unsafe { + let buf = &mut *($enc.data.as_mut_ptr().add(old_len) + as *mut [MaybeUninit; MAX_ENCODED_LEN]); + let encoded = leb128::$fun(buf, $value); + $enc.data.set_len(old_len + encoded.len()); + } + }}; +} + +/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string. +/// This way we can skip validation and still be relatively sure that deserialization +/// did not desynchronize. +/// +/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout +const STR_SENTINEL: u8 = 0xC1; + +impl Encoder for MemEncoder { + #[inline] + fn emit_usize(&mut self, v: usize) { + write_leb128!(self, v, usize, write_usize_leb128) + } + + #[inline] + fn emit_u128(&mut self, v: u128) { + write_leb128!(self, v, u128, write_u128_leb128); + } + + #[inline] + fn emit_u64(&mut self, v: u64) { + write_leb128!(self, v, u64, write_u64_leb128); + } + + #[inline] + fn emit_u32(&mut self, v: u32) { + write_leb128!(self, v, u32, write_u32_leb128); + } + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.data.push(v); + } + + #[inline] + fn emit_isize(&mut self, v: isize) { + write_leb128!(self, v, isize, write_isize_leb128) + } + + #[inline] + fn emit_i128(&mut self, v: i128) { + write_leb128!(self, v, i128, write_i128_leb128) + } + + #[inline] + fn emit_i64(&mut self, v: i64) { + write_leb128!(self, v, i64, write_i64_leb128) + } + + #[inline] + fn emit_i32(&mut self, v: i32) { + write_leb128!(self, v, i32, write_i32_leb128) + } + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + #[inline] + fn emit_i8(&mut self, v: i8) { + self.emit_u8(v as u8); + } + + #[inline] + fn emit_bool(&mut self, v: bool) { + self.emit_u8(if v { 1 } else { 0 }); + } + + #[inline] + fn emit_f64(&mut self, v: f64) { + let as_u64: u64 = v.to_bits(); + self.emit_u64(as_u64); + } + + #[inline] + fn emit_f32(&mut self, v: f32) { + let as_u32: u32 = v.to_bits(); + self.emit_u32(as_u32); + } + + #[inline] + fn emit_char(&mut self, v: char) { + self.emit_u32(v as u32); + } + + #[inline] + fn emit_str(&mut self, v: &str) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v.as_bytes()); + self.emit_u8(STR_SENTINEL); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.data.extend_from_slice(s); + } +} + +pub type FileEncodeResult = Result; + +// `FileEncoder` encodes data to file via fixed-size buffer. +// +// When encoding large amounts of data to a file, using `FileEncoder` may be +// preferred over using `MemEncoder` to encode to a `Vec`, and then writing the +// `Vec` to file, as the latter uses as much memory as there is encoded data, +// while the former uses the fixed amount of memory allocated to the buffer. +// `FileEncoder` also has the advantage of not needing to reallocate as data +// is appended to it, but the disadvantage of requiring more error handling, +// which has some runtime overhead. +pub struct FileEncoder { + // The input buffer. For adequate performance, we need more control over + // buffering than `BufWriter` offers. If `BufWriter` ever offers a raw + // buffer access API, we can use it, and remove `buf` and `buffered`. + buf: Box<[MaybeUninit]>, + buffered: usize, + flushed: usize, + file: File, + // This is used to implement delayed error handling, as described in the + // comment on `trait Encoder`. + res: Result<(), io::Error>, +} + +impl FileEncoder { + pub fn new>(path: P) -> io::Result { + const DEFAULT_BUF_SIZE: usize = 8192; + FileEncoder::with_capacity(path, DEFAULT_BUF_SIZE) + } + + pub fn with_capacity>(path: P, capacity: usize) -> io::Result { + // Require capacity at least as large as the largest LEB128 encoding + // here, so that we don't have to check or handle this on every write. + assert!(capacity >= max_leb128_len()); + + // Require capacity small enough such that some capacity checks can be + // done using guaranteed non-overflowing add rather than sub, which + // shaves an instruction off those code paths (on x86 at least). + assert!(capacity <= usize::MAX - max_leb128_len()); + + let file = File::create(path)?; + + Ok(FileEncoder { + buf: Box::new_uninit_slice(capacity), + buffered: 0, + flushed: 0, + file, + res: Ok(()), + }) + } + + #[inline] + pub fn position(&self) -> usize { + // Tracking position this way instead of having a `self.position` field + // means that we don't have to update the position on every write call. + self.flushed + self.buffered + } + + pub fn flush(&mut self) { + // This is basically a copy of `BufWriter::flush`. If `BufWriter` ever + // offers a raw buffer access API, we can use it, and remove this. + + /// Helper struct to ensure the buffer is updated after all the writes + /// are complete. It tracks the number of written bytes and drains them + /// all from the front of the buffer when dropped. + struct BufGuard<'a> { + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + flushed: usize, + } + + impl<'a> BufGuard<'a> { + fn new( + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + ) -> Self { + assert_eq!(buffer.len(), *encoder_buffered); + Self { buffer, encoder_buffered, encoder_flushed, flushed: 0 } + } + + /// The unwritten part of the buffer + fn remaining(&self) -> &[u8] { + &self.buffer[self.flushed..] + } + + /// Flag some bytes as removed from the front of the buffer + fn consume(&mut self, amt: usize) { + self.flushed += amt; + } + + /// true if all of the bytes have been written + fn done(&self) -> bool { + self.flushed >= *self.encoder_buffered + } + } + + impl Drop for BufGuard<'_> { + fn drop(&mut self) { + if self.flushed > 0 { + if self.done() { + *self.encoder_flushed += *self.encoder_buffered; + *self.encoder_buffered = 0; + } else { + self.buffer.copy_within(self.flushed.., 0); + *self.encoder_flushed += self.flushed; + *self.encoder_buffered -= self.flushed; + } + } + } + } + + // If we've already had an error, do nothing. It'll get reported after + // `finish` is called. + if self.res.is_err() { + return; + } + + let mut guard = BufGuard::new( + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[..self.buffered]) }, + &mut self.buffered, + &mut self.flushed, + ); + + while !guard.done() { + match self.file.write(guard.remaining()) { + Ok(0) => { + self.res = Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write the buffered data", + )); + return; + } + Ok(n) => guard.consume(n), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => { + self.res = Err(e); + return; + } + } + } + } + + pub fn file(&self) -> &File { + &self.file + } + + #[inline] + fn capacity(&self) -> usize { + self.buf.len() + } + + #[inline] + fn write_one(&mut self, value: u8) { + // We ensure this during `FileEncoder` construction. + debug_assert!(self.capacity() >= 1); + + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buffered >= self.capacity()) { + self.flush(); + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + *MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered) = value; + } + + self.buffered = buffered + 1; + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) { + let capacity = self.capacity(); + let buf_len = buf.len(); + + if std::intrinsics::likely(buf_len <= capacity) { + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buf_len > capacity - buffered) { + self.flush(); + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + let src = buf.as_ptr(); + let dst = MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered); + ptr::copy_nonoverlapping(src, dst, buf_len); + } + + self.buffered = buffered + buf_len; + } else { + self.write_all_unbuffered(buf); + } + } + + fn write_all_unbuffered(&mut self, mut buf: &[u8]) { + // If we've already had an error, do nothing. It'll get reported after + // `finish` is called. + if self.res.is_err() { + return; + } + + if self.buffered > 0 { + self.flush(); + } + + // This is basically a copy of `Write::write_all` but also updates our + // `self.flushed`. It's necessary because `Write::write_all` does not + // return the number of bytes written when an error is encountered, and + // without that, we cannot accurately update `self.flushed` on error. + while !buf.is_empty() { + match self.file.write(buf) { + Ok(0) => { + self.res = Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write whole buffer", + )); + return; + } + Ok(n) => { + buf = &buf[n..]; + self.flushed += n; + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => { + self.res = Err(e); + return; + } + } + } + } + + pub fn finish(mut self) -> Result { + self.flush(); + + let res = std::mem::replace(&mut self.res, Ok(())); + res.map(|()| self.position()) + } +} + +impl Drop for FileEncoder { + fn drop(&mut self) { + // Likely to be a no-op, because `finish` should have been called and + // it also flushes. But do it just in case. + let _result = self.flush(); + } +} + +macro_rules! file_encoder_write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + + // We ensure this during `FileEncoder` construction. + debug_assert!($enc.capacity() >= MAX_ENCODED_LEN); + + let mut buffered = $enc.buffered; + + // This can't overflow. See assertion in `FileEncoder::with_capacity`. + if std::intrinsics::unlikely(buffered + MAX_ENCODED_LEN > $enc.capacity()) { + $enc.flush(); + buffered = 0; + } + + // SAFETY: The above check and flush ensures that there is enough + // room to write the encoded value to the buffer. + let buf = unsafe { + &mut *($enc.buf.as_mut_ptr().add(buffered) as *mut [MaybeUninit; MAX_ENCODED_LEN]) + }; + + let encoded = leb128::$fun(buf, $value); + $enc.buffered = buffered + encoded.len(); + }}; +} + +impl Encoder for FileEncoder { + #[inline] + fn emit_usize(&mut self, v: usize) { + file_encoder_write_leb128!(self, v, usize, write_usize_leb128) + } + + #[inline] + fn emit_u128(&mut self, v: u128) { + file_encoder_write_leb128!(self, v, u128, write_u128_leb128) + } + + #[inline] + fn emit_u64(&mut self, v: u64) { + file_encoder_write_leb128!(self, v, u64, write_u64_leb128) + } + + #[inline] + fn emit_u32(&mut self, v: u32) { + file_encoder_write_leb128!(self, v, u32, write_u32_leb128) + } + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_all(&v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_one(v); + } + + #[inline] + fn emit_isize(&mut self, v: isize) { + file_encoder_write_leb128!(self, v, isize, write_isize_leb128) + } + + #[inline] + fn emit_i128(&mut self, v: i128) { + file_encoder_write_leb128!(self, v, i128, write_i128_leb128) + } + + #[inline] + fn emit_i64(&mut self, v: i64) { + file_encoder_write_leb128!(self, v, i64, write_i64_leb128) + } + + #[inline] + fn emit_i32(&mut self, v: i32) { + file_encoder_write_leb128!(self, v, i32, write_i32_leb128) + } + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_all(&v.to_le_bytes()); + } + + #[inline] + fn emit_i8(&mut self, v: i8) { + self.emit_u8(v as u8); + } + + #[inline] + fn emit_bool(&mut self, v: bool) { + self.emit_u8(if v { 1 } else { 0 }); + } + + #[inline] + fn emit_f64(&mut self, v: f64) { + let as_u64: u64 = v.to_bits(); + self.emit_u64(as_u64); + } + + #[inline] + fn emit_f32(&mut self, v: f32) { + let as_u32: u32 = v.to_bits(); + self.emit_u32(as_u32); + } + + #[inline] + fn emit_char(&mut self, v: char) { + self.emit_u32(v as u32); + } + + #[inline] + fn emit_str(&mut self, v: &str) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v.as_bytes()); + self.emit_u8(STR_SENTINEL); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.write_all(s); + } +} + +// ----------------------------------------------------------------------------- +// Decoder +// ----------------------------------------------------------------------------- + +pub struct MemDecoder<'a> { + pub data: &'a [u8], + position: usize, +} + +impl<'a> MemDecoder<'a> { + #[inline] + pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> { + MemDecoder { data, position } + } + + #[inline] + pub fn position(&self) -> usize { + self.position + } + + #[inline] + pub fn set_position(&mut self, pos: usize) { + self.position = pos + } + + #[inline] + pub fn advance(&mut self, bytes: usize) { + self.position += bytes; + } +} + +macro_rules! read_leb128 { + ($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }}; +} + +impl<'a> Decoder for MemDecoder<'a> { + #[inline] + fn read_u128(&mut self) -> u128 { + read_leb128!(self, read_u128_leb128) + } + + #[inline] + fn read_u64(&mut self) -> u64 { + read_leb128!(self, read_u64_leb128) + } + + #[inline] + fn read_u32(&mut self) -> u32 { + read_leb128!(self, read_u32_leb128) + } + + #[inline] + fn read_u16(&mut self) -> u16 { + let bytes = [self.data[self.position], self.data[self.position + 1]]; + let value = u16::from_le_bytes(bytes); + self.position += 2; + value + } + + #[inline] + fn read_u8(&mut self) -> u8 { + let value = self.data[self.position]; + self.position += 1; + value + } + + #[inline] + fn read_usize(&mut self) -> usize { + read_leb128!(self, read_usize_leb128) + } + + #[inline] + fn read_i128(&mut self) -> i128 { + read_leb128!(self, read_i128_leb128) + } + + #[inline] + fn read_i64(&mut self) -> i64 { + read_leb128!(self, read_i64_leb128) + } + + #[inline] + fn read_i32(&mut self) -> i32 { + read_leb128!(self, read_i32_leb128) + } + + #[inline] + fn read_i16(&mut self) -> i16 { + let bytes = [self.data[self.position], self.data[self.position + 1]]; + let value = i16::from_le_bytes(bytes); + self.position += 2; + value + } + + #[inline] + fn read_i8(&mut self) -> i8 { + let value = self.data[self.position]; + self.position += 1; + value as i8 + } + + #[inline] + fn read_isize(&mut self) -> isize { + read_leb128!(self, read_isize_leb128) + } + + #[inline] + fn read_bool(&mut self) -> bool { + let value = self.read_u8(); + value != 0 + } + + #[inline] + fn read_f64(&mut self) -> f64 { + let bits = self.read_u64(); + f64::from_bits(bits) + } + + #[inline] + fn read_f32(&mut self) -> f32 { + let bits = self.read_u32(); + f32::from_bits(bits) + } + + #[inline] + fn read_char(&mut self) -> char { + let bits = self.read_u32(); + std::char::from_u32(bits).unwrap() + } + + #[inline] + fn read_str(&mut self) -> &'a str { + let len = self.read_usize(); + let sentinel = self.data[self.position + len]; + assert!(sentinel == STR_SENTINEL); + let s = unsafe { + std::str::from_utf8_unchecked(&self.data[self.position..self.position + len]) + }; + self.position += len + 1; + s + } + + #[inline] + fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { + let start = self.position; + self.position += bytes; + &self.data[start..self.position] + } +} + +// Specializations for contiguous byte sequences follow. The default implementations for slices +// encode and decode each element individually. This isn't necessary for `u8` slices when using +// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. +// Therefore, we can use more efficient implementations that process the entire sequence at once. + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable for [u8] { + fn encode(&self, e: &mut MemEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +impl Encodable for [u8] { + fn encode(&self, e: &mut FileEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., +// since the default implementations call `decode` to produce a `Vec` internally. +impl<'a> Decodable> for Vec { + fn decode(d: &mut MemDecoder<'a>) -> Self { + let len = Decoder::read_usize(d); + d.read_raw_bytes(len).to_owned() + } +} + +// An integer that will always encode to 8 bytes. +pub struct IntEncodedWithFixedSize(pub u64); + +impl IntEncodedWithFixedSize { + pub const ENCODED_SIZE: usize = 8; +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut MemEncoder) { + let _start_pos = e.position(); + e.emit_raw_bytes(&self.0.to_le_bytes()); + let _end_pos = e.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut FileEncoder) { + let _start_pos = e.position(); + e.emit_raw_bytes(&self.0.to_le_bytes()); + let _end_pos = e.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl<'a> Decodable> for IntEncodedWithFixedSize { + #[inline] + fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { + let _start_pos = decoder.position(); + let bytes = decoder.read_raw_bytes(IntEncodedWithFixedSize::ENCODED_SIZE); + let value = u64::from_le_bytes(bytes.try_into().unwrap()); + let _end_pos = decoder.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + + IntEncodedWithFixedSize(value) + } +} diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs new file mode 100644 index 000000000..36585b8d7 --- /dev/null +++ b/compiler/rustc_serialize/src/serialize.rs @@ -0,0 +1,469 @@ +//! Support code for encoding and decoding types. + +/* +Core encoding and decoding interfaces. +*/ + +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::marker::PhantomData; +use std::path; +use std::rc::Rc; +use std::sync::Arc; + +/// A note about error handling. +/// +/// Encoders may be fallible, but in practice failure is rare and there are so +/// many nested calls that typical Rust error handling (via `Result` and `?`) +/// is pervasive and has non-trivial cost. Instead, impls of this trait must +/// implement a delayed error handling strategy. If a failure occurs, they +/// should record this internally, and all subsequent encoding operations can +/// be processed or ignored, whichever is appropriate. Then they should provide +/// a `finish` method that finishes up encoding. If the encoder is fallible, +/// `finish` should return a `Result` that indicates success or failure. +pub trait Encoder { + // Primitive types: + fn emit_usize(&mut self, v: usize); + fn emit_u128(&mut self, v: u128); + fn emit_u64(&mut self, v: u64); + fn emit_u32(&mut self, v: u32); + fn emit_u16(&mut self, v: u16); + fn emit_u8(&mut self, v: u8); + fn emit_isize(&mut self, v: isize); + fn emit_i128(&mut self, v: i128); + fn emit_i64(&mut self, v: i64); + fn emit_i32(&mut self, v: i32); + fn emit_i16(&mut self, v: i16); + fn emit_i8(&mut self, v: i8); + fn emit_bool(&mut self, v: bool); + fn emit_f64(&mut self, v: f64); + fn emit_f32(&mut self, v: f32); + fn emit_char(&mut self, v: char); + fn emit_str(&mut self, v: &str); + fn emit_raw_bytes(&mut self, s: &[u8]); + + // Convenience for the derive macro: + fn emit_enum_variant(&mut self, v_id: usize, f: F) + where + F: FnOnce(&mut Self), + { + self.emit_usize(v_id); + f(self); + } + + // We put the field index in a const generic to allow the emit_usize to be + // compiled into a more efficient form. In practice, the variant index is + // known at compile-time, and that knowledge allows much more efficient + // codegen than we'd otherwise get. LLVM isn't always able to make the + // optimization that would otherwise be necessary here, likely due to the + // multiple levels of inlining and const-prop that are needed. + #[inline] + fn emit_fieldless_enum_variant(&mut self) { + self.emit_usize(ID) + } +} + +// Note: all the methods in this trait are infallible, which may be surprising. +// They used to be fallible (i.e. return a `Result`) but many of the impls just +// panicked when something went wrong, and for the cases that didn't the +// top-level invocation would also just panic on failure. Switching to +// infallibility made things faster and lots of code a little simpler and more +// concise. +pub trait Decoder { + // Primitive types: + fn read_usize(&mut self) -> usize; + fn read_u128(&mut self) -> u128; + fn read_u64(&mut self) -> u64; + fn read_u32(&mut self) -> u32; + fn read_u16(&mut self) -> u16; + fn read_u8(&mut self) -> u8; + fn read_isize(&mut self) -> isize; + fn read_i128(&mut self) -> i128; + fn read_i64(&mut self) -> i64; + fn read_i32(&mut self) -> i32; + fn read_i16(&mut self) -> i16; + fn read_i8(&mut self) -> i8; + fn read_bool(&mut self) -> bool; + fn read_f64(&mut self) -> f64; + fn read_f32(&mut self) -> f32; + fn read_char(&mut self) -> char; + fn read_str(&mut self) -> &str; + fn read_raw_bytes(&mut self, len: usize) -> &[u8]; +} + +/// Trait for types that can be serialized +/// +/// This can be implemented using the `Encodable`, `TyEncodable` and +/// `MetadataEncodable` macros. +/// +/// * `Encodable` should be used in crates that don't depend on +/// `rustc_middle`. +/// * `MetadataEncodable` is used in `rustc_metadata` for types that contain +/// `rustc_metadata::rmeta::Lazy`. +/// * `TyEncodable` should be used for types that are only serialized in crate +/// metadata or the incremental cache. This is most types in `rustc_middle`. +pub trait Encodable { + fn encode(&self, s: &mut S); +} + +/// Trait for types that can be deserialized +/// +/// This can be implemented using the `Decodable`, `TyDecodable` and +/// `MetadataDecodable` macros. +/// +/// * `Decodable` should be used in crates that don't depend on +/// `rustc_middle`. +/// * `MetadataDecodable` is used in `rustc_metadata` for types that contain +/// `rustc_metadata::rmeta::Lazy`. +/// * `TyDecodable` should be used for types that are only serialized in crate +/// metadata or the incremental cache. This is most types in `rustc_middle`. +pub trait Decodable: Sized { + fn decode(d: &mut D) -> Self; +} + +macro_rules! direct_serialize_impls { + ($($ty:ident $emit_method:ident $read_method:ident),*) => { + $( + impl Encodable for $ty { + fn encode(&self, s: &mut S) { + s.$emit_method(*self); + } + } + + impl Decodable for $ty { + fn decode(d: &mut D) -> $ty { + d.$read_method() + } + } + )* + } +} + +direct_serialize_impls! { + usize emit_usize read_usize, + u8 emit_u8 read_u8, + u16 emit_u16 read_u16, + u32 emit_u32 read_u32, + u64 emit_u64 read_u64, + u128 emit_u128 read_u128, + + isize emit_isize read_isize, + i8 emit_i8 read_i8, + i16 emit_i16 read_i16, + i32 emit_i32 read_i32, + i64 emit_i64 read_i64, + i128 emit_i128 read_i128, + + f32 emit_f32 read_f32, + f64 emit_f64 read_f64, + bool emit_bool read_bool, + char emit_char read_char +} + +impl Encodable for &T +where + T: Encodable, +{ + fn encode(&self, s: &mut S) { + (**self).encode(s) + } +} + +impl Encodable for ! { + fn encode(&self, _s: &mut S) { + unreachable!(); + } +} + +impl Decodable for ! { + fn decode(_d: &mut D) -> ! { + unreachable!() + } +} + +impl Encodable for ::std::num::NonZeroU32 { + fn encode(&self, s: &mut S) { + s.emit_u32(self.get()); + } +} + +impl Decodable for ::std::num::NonZeroU32 { + fn decode(d: &mut D) -> Self { + ::std::num::NonZeroU32::new(d.read_u32()).unwrap() + } +} + +impl Encodable for str { + fn encode(&self, s: &mut S) { + s.emit_str(self); + } +} + +impl Encodable for String { + fn encode(&self, s: &mut S) { + s.emit_str(&self[..]); + } +} + +impl Decodable for String { + fn decode(d: &mut D) -> String { + d.read_str().to_owned() + } +} + +impl Encodable for () { + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for () { + fn decode(_: &mut D) -> () {} +} + +impl Encodable for PhantomData { + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for PhantomData { + fn decode(_: &mut D) -> PhantomData { + PhantomData + } +} + +impl> Decodable for Box<[T]> { + fn decode(d: &mut D) -> Box<[T]> { + let v: Vec = Decodable::decode(d); + v.into_boxed_slice() + } +} + +impl> Encodable for Rc { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} + +impl> Decodable for Rc { + fn decode(d: &mut D) -> Rc { + Rc::new(Decodable::decode(d)) + } +} + +impl> Encodable for [T] { + default fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Encodable for Vec { + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Vec { + default fn decode(d: &mut D) -> Vec { + let len = d.read_usize(); + // SAFETY: we set the capacity in advance, only write elements, and + // only set the length at the end once the writing has succeeded. + let mut vec = Vec::with_capacity(len); + unsafe { + let ptr: *mut T = vec.as_mut_ptr(); + for i in 0..len { + std::ptr::write(ptr.offset(i as isize), Decodable::decode(d)); + } + vec.set_len(len); + } + vec + } +} + +impl, const N: usize> Encodable for [T; N] { + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl Decodable for [u8; N] { + fn decode(d: &mut D) -> [u8; N] { + let len = d.read_usize(); + assert!(len == N); + let mut v = [0u8; N]; + for i in 0..len { + v[i] = Decodable::decode(d); + } + v + } +} + +impl<'a, S: Encoder, T: Encodable> Encodable for Cow<'a, [T]> +where + [T]: ToOwned>, +{ + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl + ToOwned> Decodable for Cow<'static, [T]> +where + [T]: ToOwned>, +{ + fn decode(d: &mut D) -> Cow<'static, [T]> { + let v: Vec = Decodable::decode(d); + Cow::Owned(v) + } +} + +impl<'a, S: Encoder> Encodable for Cow<'a, str> { + fn encode(&self, s: &mut S) { + let val: &str = self; + val.encode(s) + } +} + +impl<'a, D: Decoder> Decodable for Cow<'a, str> { + fn decode(d: &mut D) -> Cow<'static, str> { + let v: String = Decodable::decode(d); + Cow::Owned(v) + } +} + +impl> Encodable for Option { + fn encode(&self, s: &mut S) { + match *self { + None => s.emit_enum_variant(0, |_| {}), + Some(ref v) => s.emit_enum_variant(1, |s| v.encode(s)), + } + } +} + +impl> Decodable for Option { + fn decode(d: &mut D) -> Option { + match d.read_usize() { + 0 => None, + 1 => Some(Decodable::decode(d)), + _ => panic!("Encountered invalid discriminant while decoding `Option`."), + } + } +} + +impl, T2: Encodable> Encodable for Result { + fn encode(&self, s: &mut S) { + match *self { + Ok(ref v) => s.emit_enum_variant(0, |s| v.encode(s)), + Err(ref v) => s.emit_enum_variant(1, |s| v.encode(s)), + } + } +} + +impl, T2: Decodable> Decodable for Result { + fn decode(d: &mut D) -> Result { + match d.read_usize() { + 0 => Ok(T1::decode(d)), + 1 => Err(T2::decode(d)), + _ => panic!("Encountered invalid discriminant while decoding `Result`."), + } + } +} + +macro_rules! peel { + ($name:ident, $($other:ident,)*) => (tuple! { $($other,)* }) +} + +macro_rules! tuple { + () => (); + ( $($name:ident,)+ ) => ( + impl),+> Decodable for ($($name,)+) { + fn decode(d: &mut D) -> ($($name,)+) { + ($({ let element: $name = Decodable::decode(d); element },)+) + } + } + impl),+> Encodable for ($($name,)+) { + #[allow(non_snake_case)] + fn encode(&self, s: &mut S) { + let ($(ref $name,)+) = *self; + $($name.encode(s);)+ + } + } + peel! { $($name,)+ } + ) +} + +tuple! { T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, } + +impl Encodable for path::Path { + fn encode(&self, e: &mut S) { + self.to_str().unwrap().encode(e); + } +} + +impl Encodable for path::PathBuf { + fn encode(&self, e: &mut S) { + path::Path::encode(self, e); + } +} + +impl Decodable for path::PathBuf { + fn decode(d: &mut D) -> path::PathBuf { + let bytes: String = Decodable::decode(d); + path::PathBuf::from(bytes) + } +} + +impl + Copy> Encodable for Cell { + fn encode(&self, s: &mut S) { + self.get().encode(s); + } +} + +impl + Copy> Decodable for Cell { + fn decode(d: &mut D) -> Cell { + Cell::new(Decodable::decode(d)) + } +} + +// FIXME: #15036 +// Should use `try_borrow`, returning an +// `encoder.error("attempting to Encode borrowed RefCell")` +// from `encode` when `try_borrow` returns `None`. + +impl> Encodable for RefCell { + fn encode(&self, s: &mut S) { + self.borrow().encode(s); + } +} + +impl> Decodable for RefCell { + fn decode(d: &mut D) -> RefCell { + RefCell::new(Decodable::decode(d)) + } +} + +impl> Encodable for Arc { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} + +impl> Decodable for Arc { + fn decode(d: &mut D) -> Arc { + Arc::new(Decodable::decode(d)) + } +} + +impl> Encodable for Box { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} +impl> Decodable for Box { + fn decode(d: &mut D) -> Box { + Box::new(Decodable::decode(d)) + } +} -- cgit v1.2.3