From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_serialize/Cargo.toml | 11 + compiler/rustc_serialize/src/collection_impls.rs | 267 ++++++++ compiler/rustc_serialize/src/leb128.rs | 163 +++++ compiler/rustc_serialize/src/lib.rs | 28 + compiler/rustc_serialize/src/opaque.rs | 750 +++++++++++++++++++++++ compiler/rustc_serialize/src/serialize.rs | 469 ++++++++++++++ compiler/rustc_serialize/tests/leb128.rs | 91 +++ compiler/rustc_serialize/tests/opaque.rs | 277 +++++++++ 8 files changed, 2056 insertions(+) create mode 100644 compiler/rustc_serialize/Cargo.toml create mode 100644 compiler/rustc_serialize/src/collection_impls.rs create mode 100644 compiler/rustc_serialize/src/leb128.rs create mode 100644 compiler/rustc_serialize/src/lib.rs create mode 100644 compiler/rustc_serialize/src/opaque.rs create mode 100644 compiler/rustc_serialize/src/serialize.rs create mode 100644 compiler/rustc_serialize/tests/leb128.rs create mode 100644 compiler/rustc_serialize/tests/opaque.rs (limited to 'compiler/rustc_serialize') diff --git a/compiler/rustc_serialize/Cargo.toml b/compiler/rustc_serialize/Cargo.toml new file mode 100644 index 000000000..dbc5c1519 --- /dev/null +++ b/compiler/rustc_serialize/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rustc_serialize" +version = "0.0.0" +edition = "2021" + +[dependencies] +indexmap = "1.9.1" +smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } + +[dev-dependencies] +rustc_macros = { path = "../rustc_macros" } diff --git a/compiler/rustc_serialize/src/collection_impls.rs b/compiler/rustc_serialize/src/collection_impls.rs new file mode 100644 index 000000000..5e53f0b10 --- /dev/null +++ b/compiler/rustc_serialize/src/collection_impls.rs @@ -0,0 +1,267 @@ +//! Implementations of serialization for structures found in liballoc + +use std::hash::{BuildHasher, Hash}; + +use crate::{Decodable, Decoder, Encodable, Encoder}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, LinkedList, VecDeque}; +use std::rc::Rc; +use std::sync::Arc; + +use smallvec::{Array, SmallVec}; + +impl>> Encodable for SmallVec { + fn encode(&self, s: &mut S) { + let slice: &[A::Item] = self; + slice.encode(s); + } +} + +impl>> Decodable for SmallVec { + fn decode(d: &mut D) -> SmallVec { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl> Encodable for LinkedList { + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Decodable for LinkedList { + fn decode(d: &mut D) -> LinkedList { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl> Encodable for VecDeque { + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Decodable for VecDeque { + fn decode(d: &mut D) -> VecDeque { + let len = d.read_usize(); + (0..len).map(|_| Decodable::decode(d)).collect() + } +} + +impl Encodable for BTreeMap +where + K: Encodable + PartialEq + Ord, + V: Encodable, +{ + fn encode(&self, e: &mut S) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for BTreeMap +where + K: Decodable + PartialEq + Ord, + V: Decodable, +{ + fn decode(d: &mut D) -> BTreeMap { + let len = d.read_usize(); + let mut map = BTreeMap::new(); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for BTreeSet +where + T: Encodable + PartialEq + Ord, +{ + fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for BTreeSet +where + T: Decodable + PartialEq + Ord, +{ + fn decode(d: &mut D) -> BTreeSet { + let len = d.read_usize(); + let mut set = BTreeSet::new(); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl Encodable for HashMap +where + K: Encodable + Eq, + V: Encodable, + S: BuildHasher, +{ + fn encode(&self, e: &mut E) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for HashMap +where + K: Decodable + Hash + Eq, + V: Decodable, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> HashMap { + let len = d.read_usize(); + let state = Default::default(); + let mut map = HashMap::with_capacity_and_hasher(len, state); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for HashSet +where + T: Encodable + Eq, + S: BuildHasher, +{ + fn encode(&self, s: &mut E) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for HashSet +where + T: Decodable + Hash + Eq, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> HashSet { + let len = d.read_usize(); + let state = Default::default(); + let mut set = HashSet::with_capacity_and_hasher(len, state); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl Encodable for indexmap::IndexMap +where + K: Encodable + Hash + Eq, + V: Encodable, + S: BuildHasher, +{ + fn encode(&self, e: &mut E) { + e.emit_usize(self.len()); + for (key, val) in self.iter() { + key.encode(e); + val.encode(e); + } + } +} + +impl Decodable for indexmap::IndexMap +where + K: Decodable + Hash + Eq, + V: Decodable, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> indexmap::IndexMap { + let len = d.read_usize(); + let state = Default::default(); + let mut map = indexmap::IndexMap::with_capacity_and_hasher(len, state); + for _ in 0..len { + let key = Decodable::decode(d); + let val = Decodable::decode(d); + map.insert(key, val); + } + map + } +} + +impl Encodable for indexmap::IndexSet +where + T: Encodable + Hash + Eq, + S: BuildHasher, +{ + fn encode(&self, s: &mut E) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl Decodable for indexmap::IndexSet +where + T: Decodable + Hash + Eq, + S: BuildHasher + Default, +{ + fn decode(d: &mut D) -> indexmap::IndexSet { + let len = d.read_usize(); + let state = Default::default(); + let mut set = indexmap::IndexSet::with_capacity_and_hasher(len, state); + for _ in 0..len { + set.insert(Decodable::decode(d)); + } + set + } +} + +impl> Encodable for Rc<[T]> { + fn encode(&self, s: &mut E) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Rc<[T]> { + fn decode(d: &mut D) -> Rc<[T]> { + let vec: Vec = Decodable::decode(d); + vec.into() + } +} + +impl> Encodable for Arc<[T]> { + fn encode(&self, s: &mut E) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Arc<[T]> { + fn decode(d: &mut D) -> Arc<[T]> { + let vec: Vec = Decodable::decode(d); + vec.into() + } +} diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs new file mode 100644 index 000000000..08b3c0542 --- /dev/null +++ b/compiler/rustc_serialize/src/leb128.rs @@ -0,0 +1,163 @@ +#![macro_use] + +macro_rules! max_leb128_len { + ($int_ty:ty) => { + // The longest LEB128 encoding for an integer uses 7 bits per byte. + (std::mem::size_of::<$int_ty>() * 8 + 6) / 7 + }; +} + +// Returns the longest LEB128 encoding of all supported integer types. +pub const fn max_leb128_len() -> usize { + max_leb128_len!(u128) +} + +macro_rules! impl_write_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; + + loop { + if value < 0x80 { + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = value as u8; + } + + i += 1; + break; + } else { + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = ((value & 0x7f) | 0x80) as u8; + } + + value >>= 7; + i += 1; + } + } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } + } + }; +} + +impl_write_unsigned_leb128!(write_u16_leb128, u16); +impl_write_unsigned_leb128!(write_u32_leb128, u32); +impl_write_unsigned_leb128!(write_u64_leb128, u64); +impl_write_unsigned_leb128!(write_u128_leb128, u128); +impl_write_unsigned_leb128!(write_usize_leb128, usize); + +macro_rules! impl_read_unsigned_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty { + // The first iteration of this loop is unpeeled. This is a + // performance win because this code is hot and integer values less + // than 128 are very common, typically occurring 50-80% or more of + // the time, even for u64 and u128. + let byte = slice[*position]; + *position += 1; + if (byte & 0x80) == 0 { + return byte as $int_ty; + } + let mut result = (byte & 0x7F) as $int_ty; + let mut shift = 7; + loop { + let byte = slice[*position]; + *position += 1; + if (byte & 0x80) == 0 { + result |= (byte as $int_ty) << shift; + return result; + } else { + result |= ((byte & 0x7F) as $int_ty) << shift; + } + shift += 7; + } + } + }; +} + +impl_read_unsigned_leb128!(read_u16_leb128, u16); +impl_read_unsigned_leb128!(read_u32_leb128, u32); +impl_read_unsigned_leb128!(read_u64_leb128, u64); +impl_read_unsigned_leb128!(read_u128_leb128, u128); +impl_read_unsigned_leb128!(read_usize_leb128, usize); + +macro_rules! impl_write_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; + + loop { + let mut byte = (value as u8) & 0x7f; + value >>= 7; + let more = !(((value == 0) && ((byte & 0x40) == 0)) + || ((value == -1) && ((byte & 0x40) != 0))); + + if more { + byte |= 0x80; // Mark this byte to show that more bytes will follow. + } + + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = byte; + } + + i += 1; + + if !more { + break; + } + } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } + } + }; +} + +impl_write_signed_leb128!(write_i16_leb128, i16); +impl_write_signed_leb128!(write_i32_leb128, i32); +impl_write_signed_leb128!(write_i64_leb128, i64); +impl_write_signed_leb128!(write_i128_leb128, i128); +impl_write_signed_leb128!(write_isize_leb128, isize); + +macro_rules! impl_read_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name(slice: &[u8], position: &mut usize) -> $int_ty { + let mut result = 0; + let mut shift = 0; + let mut byte; + + loop { + byte = slice[*position]; + *position += 1; + result |= <$int_ty>::from(byte & 0x7F) << shift; + shift += 7; + + if (byte & 0x80) == 0 { + break; + } + } + + if (shift < <$int_ty>::BITS) && ((byte & 0x40) != 0) { + // sign extend + result |= (!0 << shift); + } + + result + } + }; +} + +impl_read_signed_leb128!(read_i16_leb128, i16); +impl_read_signed_leb128!(read_i32_leb128, i32); +impl_read_signed_leb128!(read_i64_leb128, i64); +impl_read_signed_leb128!(read_i128_leb128, i128); +impl_read_signed_leb128!(read_isize_leb128, isize); diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs new file mode 100644 index 000000000..e606f4273 --- /dev/null +++ b/compiler/rustc_serialize/src/lib.rs @@ -0,0 +1,28 @@ +//! Support code for encoding and decoding types. + +/* +Core encoding and decoding interfaces. +*/ + +#![doc( + html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/", + html_playground_url = "https://play.rust-lang.org/", + test(attr(allow(unused_variables), deny(warnings))) +)] +#![feature(never_type)] +#![feature(associated_type_bounds)] +#![feature(min_specialization)] +#![feature(core_intrinsics)] +#![feature(maybe_uninit_slice)] +#![feature(let_else)] +#![feature(new_uninit)] +#![cfg_attr(test, feature(test))] +#![allow(rustc::internal)] + +pub use self::serialize::{Decodable, Decoder, Encodable, Encoder}; + +mod collection_impls; +mod serialize; + +pub mod leb128; +pub mod opaque; diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs new file mode 100644 index 000000000..5c17ef6ac --- /dev/null +++ b/compiler/rustc_serialize/src/opaque.rs @@ -0,0 +1,750 @@ +use crate::leb128::{self, max_leb128_len}; +use crate::serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::convert::TryInto; +use std::fs::File; +use std::io::{self, Write}; +use std::mem::MaybeUninit; +use std::path::Path; +use std::ptr; + +// ----------------------------------------------------------------------------- +// Encoder +// ----------------------------------------------------------------------------- + +pub struct MemEncoder { + pub data: Vec, +} + +impl MemEncoder { + pub fn new() -> MemEncoder { + MemEncoder { data: vec![] } + } + + #[inline] + pub fn position(&self) -> usize { + self.data.len() + } + + pub fn finish(self) -> Vec { + self.data + } +} + +macro_rules! write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + let old_len = $enc.data.len(); + + if MAX_ENCODED_LEN > $enc.data.capacity() - old_len { + $enc.data.reserve(MAX_ENCODED_LEN); + } + + // SAFETY: The above check and `reserve` ensures that there is enough + // room to write the encoded value to the vector's internal buffer. + unsafe { + let buf = &mut *($enc.data.as_mut_ptr().add(old_len) + as *mut [MaybeUninit; MAX_ENCODED_LEN]); + let encoded = leb128::$fun(buf, $value); + $enc.data.set_len(old_len + encoded.len()); + } + }}; +} + +/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string. +/// This way we can skip validation and still be relatively sure that deserialization +/// did not desynchronize. +/// +/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout +const STR_SENTINEL: u8 = 0xC1; + +impl Encoder for MemEncoder { + #[inline] + fn emit_usize(&mut self, v: usize) { + write_leb128!(self, v, usize, write_usize_leb128) + } + + #[inline] + fn emit_u128(&mut self, v: u128) { + write_leb128!(self, v, u128, write_u128_leb128); + } + + #[inline] + fn emit_u64(&mut self, v: u64) { + write_leb128!(self, v, u64, write_u64_leb128); + } + + #[inline] + fn emit_u32(&mut self, v: u32) { + write_leb128!(self, v, u32, write_u32_leb128); + } + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.data.push(v); + } + + #[inline] + fn emit_isize(&mut self, v: isize) { + write_leb128!(self, v, isize, write_isize_leb128) + } + + #[inline] + fn emit_i128(&mut self, v: i128) { + write_leb128!(self, v, i128, write_i128_leb128) + } + + #[inline] + fn emit_i64(&mut self, v: i64) { + write_leb128!(self, v, i64, write_i64_leb128) + } + + #[inline] + fn emit_i32(&mut self, v: i32) { + write_leb128!(self, v, i32, write_i32_leb128) + } + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.data.extend_from_slice(&v.to_le_bytes()); + } + + #[inline] + fn emit_i8(&mut self, v: i8) { + self.emit_u8(v as u8); + } + + #[inline] + fn emit_bool(&mut self, v: bool) { + self.emit_u8(if v { 1 } else { 0 }); + } + + #[inline] + fn emit_f64(&mut self, v: f64) { + let as_u64: u64 = v.to_bits(); + self.emit_u64(as_u64); + } + + #[inline] + fn emit_f32(&mut self, v: f32) { + let as_u32: u32 = v.to_bits(); + self.emit_u32(as_u32); + } + + #[inline] + fn emit_char(&mut self, v: char) { + self.emit_u32(v as u32); + } + + #[inline] + fn emit_str(&mut self, v: &str) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v.as_bytes()); + self.emit_u8(STR_SENTINEL); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.data.extend_from_slice(s); + } +} + +pub type FileEncodeResult = Result; + +// `FileEncoder` encodes data to file via fixed-size buffer. +// +// When encoding large amounts of data to a file, using `FileEncoder` may be +// preferred over using `MemEncoder` to encode to a `Vec`, and then writing the +// `Vec` to file, as the latter uses as much memory as there is encoded data, +// while the former uses the fixed amount of memory allocated to the buffer. +// `FileEncoder` also has the advantage of not needing to reallocate as data +// is appended to it, but the disadvantage of requiring more error handling, +// which has some runtime overhead. +pub struct FileEncoder { + // The input buffer. For adequate performance, we need more control over + // buffering than `BufWriter` offers. If `BufWriter` ever offers a raw + // buffer access API, we can use it, and remove `buf` and `buffered`. + buf: Box<[MaybeUninit]>, + buffered: usize, + flushed: usize, + file: File, + // This is used to implement delayed error handling, as described in the + // comment on `trait Encoder`. + res: Result<(), io::Error>, +} + +impl FileEncoder { + pub fn new>(path: P) -> io::Result { + const DEFAULT_BUF_SIZE: usize = 8192; + FileEncoder::with_capacity(path, DEFAULT_BUF_SIZE) + } + + pub fn with_capacity>(path: P, capacity: usize) -> io::Result { + // Require capacity at least as large as the largest LEB128 encoding + // here, so that we don't have to check or handle this on every write. + assert!(capacity >= max_leb128_len()); + + // Require capacity small enough such that some capacity checks can be + // done using guaranteed non-overflowing add rather than sub, which + // shaves an instruction off those code paths (on x86 at least). + assert!(capacity <= usize::MAX - max_leb128_len()); + + let file = File::create(path)?; + + Ok(FileEncoder { + buf: Box::new_uninit_slice(capacity), + buffered: 0, + flushed: 0, + file, + res: Ok(()), + }) + } + + #[inline] + pub fn position(&self) -> usize { + // Tracking position this way instead of having a `self.position` field + // means that we don't have to update the position on every write call. + self.flushed + self.buffered + } + + pub fn flush(&mut self) { + // This is basically a copy of `BufWriter::flush`. If `BufWriter` ever + // offers a raw buffer access API, we can use it, and remove this. + + /// Helper struct to ensure the buffer is updated after all the writes + /// are complete. It tracks the number of written bytes and drains them + /// all from the front of the buffer when dropped. + struct BufGuard<'a> { + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + flushed: usize, + } + + impl<'a> BufGuard<'a> { + fn new( + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + ) -> Self { + assert_eq!(buffer.len(), *encoder_buffered); + Self { buffer, encoder_buffered, encoder_flushed, flushed: 0 } + } + + /// The unwritten part of the buffer + fn remaining(&self) -> &[u8] { + &self.buffer[self.flushed..] + } + + /// Flag some bytes as removed from the front of the buffer + fn consume(&mut self, amt: usize) { + self.flushed += amt; + } + + /// true if all of the bytes have been written + fn done(&self) -> bool { + self.flushed >= *self.encoder_buffered + } + } + + impl Drop for BufGuard<'_> { + fn drop(&mut self) { + if self.flushed > 0 { + if self.done() { + *self.encoder_flushed += *self.encoder_buffered; + *self.encoder_buffered = 0; + } else { + self.buffer.copy_within(self.flushed.., 0); + *self.encoder_flushed += self.flushed; + *self.encoder_buffered -= self.flushed; + } + } + } + } + + // If we've already had an error, do nothing. It'll get reported after + // `finish` is called. + if self.res.is_err() { + return; + } + + let mut guard = BufGuard::new( + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[..self.buffered]) }, + &mut self.buffered, + &mut self.flushed, + ); + + while !guard.done() { + match self.file.write(guard.remaining()) { + Ok(0) => { + self.res = Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write the buffered data", + )); + return; + } + Ok(n) => guard.consume(n), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => { + self.res = Err(e); + return; + } + } + } + } + + pub fn file(&self) -> &File { + &self.file + } + + #[inline] + fn capacity(&self) -> usize { + self.buf.len() + } + + #[inline] + fn write_one(&mut self, value: u8) { + // We ensure this during `FileEncoder` construction. + debug_assert!(self.capacity() >= 1); + + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buffered >= self.capacity()) { + self.flush(); + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + *MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered) = value; + } + + self.buffered = buffered + 1; + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) { + let capacity = self.capacity(); + let buf_len = buf.len(); + + if std::intrinsics::likely(buf_len <= capacity) { + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buf_len > capacity - buffered) { + self.flush(); + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + let src = buf.as_ptr(); + let dst = MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered); + ptr::copy_nonoverlapping(src, dst, buf_len); + } + + self.buffered = buffered + buf_len; + } else { + self.write_all_unbuffered(buf); + } + } + + fn write_all_unbuffered(&mut self, mut buf: &[u8]) { + // If we've already had an error, do nothing. It'll get reported after + // `finish` is called. + if self.res.is_err() { + return; + } + + if self.buffered > 0 { + self.flush(); + } + + // This is basically a copy of `Write::write_all` but also updates our + // `self.flushed`. It's necessary because `Write::write_all` does not + // return the number of bytes written when an error is encountered, and + // without that, we cannot accurately update `self.flushed` on error. + while !buf.is_empty() { + match self.file.write(buf) { + Ok(0) => { + self.res = Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write whole buffer", + )); + return; + } + Ok(n) => { + buf = &buf[n..]; + self.flushed += n; + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => { + self.res = Err(e); + return; + } + } + } + } + + pub fn finish(mut self) -> Result { + self.flush(); + + let res = std::mem::replace(&mut self.res, Ok(())); + res.map(|()| self.position()) + } +} + +impl Drop for FileEncoder { + fn drop(&mut self) { + // Likely to be a no-op, because `finish` should have been called and + // it also flushes. But do it just in case. + let _result = self.flush(); + } +} + +macro_rules! file_encoder_write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + + // We ensure this during `FileEncoder` construction. + debug_assert!($enc.capacity() >= MAX_ENCODED_LEN); + + let mut buffered = $enc.buffered; + + // This can't overflow. See assertion in `FileEncoder::with_capacity`. + if std::intrinsics::unlikely(buffered + MAX_ENCODED_LEN > $enc.capacity()) { + $enc.flush(); + buffered = 0; + } + + // SAFETY: The above check and flush ensures that there is enough + // room to write the encoded value to the buffer. + let buf = unsafe { + &mut *($enc.buf.as_mut_ptr().add(buffered) as *mut [MaybeUninit; MAX_ENCODED_LEN]) + }; + + let encoded = leb128::$fun(buf, $value); + $enc.buffered = buffered + encoded.len(); + }}; +} + +impl Encoder for FileEncoder { + #[inline] + fn emit_usize(&mut self, v: usize) { + file_encoder_write_leb128!(self, v, usize, write_usize_leb128) + } + + #[inline] + fn emit_u128(&mut self, v: u128) { + file_encoder_write_leb128!(self, v, u128, write_u128_leb128) + } + + #[inline] + fn emit_u64(&mut self, v: u64) { + file_encoder_write_leb128!(self, v, u64, write_u64_leb128) + } + + #[inline] + fn emit_u32(&mut self, v: u32) { + file_encoder_write_leb128!(self, v, u32, write_u32_leb128) + } + + #[inline] + fn emit_u16(&mut self, v: u16) { + self.write_all(&v.to_le_bytes()); + } + + #[inline] + fn emit_u8(&mut self, v: u8) { + self.write_one(v); + } + + #[inline] + fn emit_isize(&mut self, v: isize) { + file_encoder_write_leb128!(self, v, isize, write_isize_leb128) + } + + #[inline] + fn emit_i128(&mut self, v: i128) { + file_encoder_write_leb128!(self, v, i128, write_i128_leb128) + } + + #[inline] + fn emit_i64(&mut self, v: i64) { + file_encoder_write_leb128!(self, v, i64, write_i64_leb128) + } + + #[inline] + fn emit_i32(&mut self, v: i32) { + file_encoder_write_leb128!(self, v, i32, write_i32_leb128) + } + + #[inline] + fn emit_i16(&mut self, v: i16) { + self.write_all(&v.to_le_bytes()); + } + + #[inline] + fn emit_i8(&mut self, v: i8) { + self.emit_u8(v as u8); + } + + #[inline] + fn emit_bool(&mut self, v: bool) { + self.emit_u8(if v { 1 } else { 0 }); + } + + #[inline] + fn emit_f64(&mut self, v: f64) { + let as_u64: u64 = v.to_bits(); + self.emit_u64(as_u64); + } + + #[inline] + fn emit_f32(&mut self, v: f32) { + let as_u32: u32 = v.to_bits(); + self.emit_u32(as_u32); + } + + #[inline] + fn emit_char(&mut self, v: char) { + self.emit_u32(v as u32); + } + + #[inline] + fn emit_str(&mut self, v: &str) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v.as_bytes()); + self.emit_u8(STR_SENTINEL); + } + + #[inline] + fn emit_raw_bytes(&mut self, s: &[u8]) { + self.write_all(s); + } +} + +// ----------------------------------------------------------------------------- +// Decoder +// ----------------------------------------------------------------------------- + +pub struct MemDecoder<'a> { + pub data: &'a [u8], + position: usize, +} + +impl<'a> MemDecoder<'a> { + #[inline] + pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> { + MemDecoder { data, position } + } + + #[inline] + pub fn position(&self) -> usize { + self.position + } + + #[inline] + pub fn set_position(&mut self, pos: usize) { + self.position = pos + } + + #[inline] + pub fn advance(&mut self, bytes: usize) { + self.position += bytes; + } +} + +macro_rules! read_leb128 { + ($dec:expr, $fun:ident) => {{ leb128::$fun($dec.data, &mut $dec.position) }}; +} + +impl<'a> Decoder for MemDecoder<'a> { + #[inline] + fn read_u128(&mut self) -> u128 { + read_leb128!(self, read_u128_leb128) + } + + #[inline] + fn read_u64(&mut self) -> u64 { + read_leb128!(self, read_u64_leb128) + } + + #[inline] + fn read_u32(&mut self) -> u32 { + read_leb128!(self, read_u32_leb128) + } + + #[inline] + fn read_u16(&mut self) -> u16 { + let bytes = [self.data[self.position], self.data[self.position + 1]]; + let value = u16::from_le_bytes(bytes); + self.position += 2; + value + } + + #[inline] + fn read_u8(&mut self) -> u8 { + let value = self.data[self.position]; + self.position += 1; + value + } + + #[inline] + fn read_usize(&mut self) -> usize { + read_leb128!(self, read_usize_leb128) + } + + #[inline] + fn read_i128(&mut self) -> i128 { + read_leb128!(self, read_i128_leb128) + } + + #[inline] + fn read_i64(&mut self) -> i64 { + read_leb128!(self, read_i64_leb128) + } + + #[inline] + fn read_i32(&mut self) -> i32 { + read_leb128!(self, read_i32_leb128) + } + + #[inline] + fn read_i16(&mut self) -> i16 { + let bytes = [self.data[self.position], self.data[self.position + 1]]; + let value = i16::from_le_bytes(bytes); + self.position += 2; + value + } + + #[inline] + fn read_i8(&mut self) -> i8 { + let value = self.data[self.position]; + self.position += 1; + value as i8 + } + + #[inline] + fn read_isize(&mut self) -> isize { + read_leb128!(self, read_isize_leb128) + } + + #[inline] + fn read_bool(&mut self) -> bool { + let value = self.read_u8(); + value != 0 + } + + #[inline] + fn read_f64(&mut self) -> f64 { + let bits = self.read_u64(); + f64::from_bits(bits) + } + + #[inline] + fn read_f32(&mut self) -> f32 { + let bits = self.read_u32(); + f32::from_bits(bits) + } + + #[inline] + fn read_char(&mut self) -> char { + let bits = self.read_u32(); + std::char::from_u32(bits).unwrap() + } + + #[inline] + fn read_str(&mut self) -> &'a str { + let len = self.read_usize(); + let sentinel = self.data[self.position + len]; + assert!(sentinel == STR_SENTINEL); + let s = unsafe { + std::str::from_utf8_unchecked(&self.data[self.position..self.position + len]) + }; + self.position += len + 1; + s + } + + #[inline] + fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] { + let start = self.position; + self.position += bytes; + &self.data[start..self.position] + } +} + +// Specializations for contiguous byte sequences follow. The default implementations for slices +// encode and decode each element individually. This isn't necessary for `u8` slices when using +// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. +// Therefore, we can use more efficient implementations that process the entire sequence at once. + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. +impl Encodable for [u8] { + fn encode(&self, e: &mut MemEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +impl Encodable for [u8] { + fn encode(&self, e: &mut FileEncoder) { + Encoder::emit_usize(e, self.len()); + e.emit_raw_bytes(self); + } +} + +// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., +// since the default implementations call `decode` to produce a `Vec` internally. +impl<'a> Decodable> for Vec { + fn decode(d: &mut MemDecoder<'a>) -> Self { + let len = Decoder::read_usize(d); + d.read_raw_bytes(len).to_owned() + } +} + +// An integer that will always encode to 8 bytes. +pub struct IntEncodedWithFixedSize(pub u64); + +impl IntEncodedWithFixedSize { + pub const ENCODED_SIZE: usize = 8; +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut MemEncoder) { + let _start_pos = e.position(); + e.emit_raw_bytes(&self.0.to_le_bytes()); + let _end_pos = e.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl Encodable for IntEncodedWithFixedSize { + #[inline] + fn encode(&self, e: &mut FileEncoder) { + let _start_pos = e.position(); + e.emit_raw_bytes(&self.0.to_le_bytes()); + let _end_pos = e.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + } +} + +impl<'a> Decodable> for IntEncodedWithFixedSize { + #[inline] + fn decode(decoder: &mut MemDecoder<'a>) -> IntEncodedWithFixedSize { + let _start_pos = decoder.position(); + let bytes = decoder.read_raw_bytes(IntEncodedWithFixedSize::ENCODED_SIZE); + let value = u64::from_le_bytes(bytes.try_into().unwrap()); + let _end_pos = decoder.position(); + debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); + + IntEncodedWithFixedSize(value) + } +} diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs new file mode 100644 index 000000000..36585b8d7 --- /dev/null +++ b/compiler/rustc_serialize/src/serialize.rs @@ -0,0 +1,469 @@ +//! Support code for encoding and decoding types. + +/* +Core encoding and decoding interfaces. +*/ + +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::marker::PhantomData; +use std::path; +use std::rc::Rc; +use std::sync::Arc; + +/// A note about error handling. +/// +/// Encoders may be fallible, but in practice failure is rare and there are so +/// many nested calls that typical Rust error handling (via `Result` and `?`) +/// is pervasive and has non-trivial cost. Instead, impls of this trait must +/// implement a delayed error handling strategy. If a failure occurs, they +/// should record this internally, and all subsequent encoding operations can +/// be processed or ignored, whichever is appropriate. Then they should provide +/// a `finish` method that finishes up encoding. If the encoder is fallible, +/// `finish` should return a `Result` that indicates success or failure. +pub trait Encoder { + // Primitive types: + fn emit_usize(&mut self, v: usize); + fn emit_u128(&mut self, v: u128); + fn emit_u64(&mut self, v: u64); + fn emit_u32(&mut self, v: u32); + fn emit_u16(&mut self, v: u16); + fn emit_u8(&mut self, v: u8); + fn emit_isize(&mut self, v: isize); + fn emit_i128(&mut self, v: i128); + fn emit_i64(&mut self, v: i64); + fn emit_i32(&mut self, v: i32); + fn emit_i16(&mut self, v: i16); + fn emit_i8(&mut self, v: i8); + fn emit_bool(&mut self, v: bool); + fn emit_f64(&mut self, v: f64); + fn emit_f32(&mut self, v: f32); + fn emit_char(&mut self, v: char); + fn emit_str(&mut self, v: &str); + fn emit_raw_bytes(&mut self, s: &[u8]); + + // Convenience for the derive macro: + fn emit_enum_variant(&mut self, v_id: usize, f: F) + where + F: FnOnce(&mut Self), + { + self.emit_usize(v_id); + f(self); + } + + // We put the field index in a const generic to allow the emit_usize to be + // compiled into a more efficient form. In practice, the variant index is + // known at compile-time, and that knowledge allows much more efficient + // codegen than we'd otherwise get. LLVM isn't always able to make the + // optimization that would otherwise be necessary here, likely due to the + // multiple levels of inlining and const-prop that are needed. + #[inline] + fn emit_fieldless_enum_variant(&mut self) { + self.emit_usize(ID) + } +} + +// Note: all the methods in this trait are infallible, which may be surprising. +// They used to be fallible (i.e. return a `Result`) but many of the impls just +// panicked when something went wrong, and for the cases that didn't the +// top-level invocation would also just panic on failure. Switching to +// infallibility made things faster and lots of code a little simpler and more +// concise. +pub trait Decoder { + // Primitive types: + fn read_usize(&mut self) -> usize; + fn read_u128(&mut self) -> u128; + fn read_u64(&mut self) -> u64; + fn read_u32(&mut self) -> u32; + fn read_u16(&mut self) -> u16; + fn read_u8(&mut self) -> u8; + fn read_isize(&mut self) -> isize; + fn read_i128(&mut self) -> i128; + fn read_i64(&mut self) -> i64; + fn read_i32(&mut self) -> i32; + fn read_i16(&mut self) -> i16; + fn read_i8(&mut self) -> i8; + fn read_bool(&mut self) -> bool; + fn read_f64(&mut self) -> f64; + fn read_f32(&mut self) -> f32; + fn read_char(&mut self) -> char; + fn read_str(&mut self) -> &str; + fn read_raw_bytes(&mut self, len: usize) -> &[u8]; +} + +/// Trait for types that can be serialized +/// +/// This can be implemented using the `Encodable`, `TyEncodable` and +/// `MetadataEncodable` macros. +/// +/// * `Encodable` should be used in crates that don't depend on +/// `rustc_middle`. +/// * `MetadataEncodable` is used in `rustc_metadata` for types that contain +/// `rustc_metadata::rmeta::Lazy`. +/// * `TyEncodable` should be used for types that are only serialized in crate +/// metadata or the incremental cache. This is most types in `rustc_middle`. +pub trait Encodable { + fn encode(&self, s: &mut S); +} + +/// Trait for types that can be deserialized +/// +/// This can be implemented using the `Decodable`, `TyDecodable` and +/// `MetadataDecodable` macros. +/// +/// * `Decodable` should be used in crates that don't depend on +/// `rustc_middle`. +/// * `MetadataDecodable` is used in `rustc_metadata` for types that contain +/// `rustc_metadata::rmeta::Lazy`. +/// * `TyDecodable` should be used for types that are only serialized in crate +/// metadata or the incremental cache. This is most types in `rustc_middle`. +pub trait Decodable: Sized { + fn decode(d: &mut D) -> Self; +} + +macro_rules! direct_serialize_impls { + ($($ty:ident $emit_method:ident $read_method:ident),*) => { + $( + impl Encodable for $ty { + fn encode(&self, s: &mut S) { + s.$emit_method(*self); + } + } + + impl Decodable for $ty { + fn decode(d: &mut D) -> $ty { + d.$read_method() + } + } + )* + } +} + +direct_serialize_impls! { + usize emit_usize read_usize, + u8 emit_u8 read_u8, + u16 emit_u16 read_u16, + u32 emit_u32 read_u32, + u64 emit_u64 read_u64, + u128 emit_u128 read_u128, + + isize emit_isize read_isize, + i8 emit_i8 read_i8, + i16 emit_i16 read_i16, + i32 emit_i32 read_i32, + i64 emit_i64 read_i64, + i128 emit_i128 read_i128, + + f32 emit_f32 read_f32, + f64 emit_f64 read_f64, + bool emit_bool read_bool, + char emit_char read_char +} + +impl Encodable for &T +where + T: Encodable, +{ + fn encode(&self, s: &mut S) { + (**self).encode(s) + } +} + +impl Encodable for ! { + fn encode(&self, _s: &mut S) { + unreachable!(); + } +} + +impl Decodable for ! { + fn decode(_d: &mut D) -> ! { + unreachable!() + } +} + +impl Encodable for ::std::num::NonZeroU32 { + fn encode(&self, s: &mut S) { + s.emit_u32(self.get()); + } +} + +impl Decodable for ::std::num::NonZeroU32 { + fn decode(d: &mut D) -> Self { + ::std::num::NonZeroU32::new(d.read_u32()).unwrap() + } +} + +impl Encodable for str { + fn encode(&self, s: &mut S) { + s.emit_str(self); + } +} + +impl Encodable for String { + fn encode(&self, s: &mut S) { + s.emit_str(&self[..]); + } +} + +impl Decodable for String { + fn decode(d: &mut D) -> String { + d.read_str().to_owned() + } +} + +impl Encodable for () { + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for () { + fn decode(_: &mut D) -> () {} +} + +impl Encodable for PhantomData { + fn encode(&self, _s: &mut S) {} +} + +impl Decodable for PhantomData { + fn decode(_: &mut D) -> PhantomData { + PhantomData + } +} + +impl> Decodable for Box<[T]> { + fn decode(d: &mut D) -> Box<[T]> { + let v: Vec = Decodable::decode(d); + v.into_boxed_slice() + } +} + +impl> Encodable for Rc { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} + +impl> Decodable for Rc { + fn decode(d: &mut D) -> Rc { + Rc::new(Decodable::decode(d)) + } +} + +impl> Encodable for [T] { + default fn encode(&self, s: &mut S) { + s.emit_usize(self.len()); + for e in self.iter() { + e.encode(s); + } + } +} + +impl> Encodable for Vec { + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl> Decodable for Vec { + default fn decode(d: &mut D) -> Vec { + let len = d.read_usize(); + // SAFETY: we set the capacity in advance, only write elements, and + // only set the length at the end once the writing has succeeded. + let mut vec = Vec::with_capacity(len); + unsafe { + let ptr: *mut T = vec.as_mut_ptr(); + for i in 0..len { + std::ptr::write(ptr.offset(i as isize), Decodable::decode(d)); + } + vec.set_len(len); + } + vec + } +} + +impl, const N: usize> Encodable for [T; N] { + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl Decodable for [u8; N] { + fn decode(d: &mut D) -> [u8; N] { + let len = d.read_usize(); + assert!(len == N); + let mut v = [0u8; N]; + for i in 0..len { + v[i] = Decodable::decode(d); + } + v + } +} + +impl<'a, S: Encoder, T: Encodable> Encodable for Cow<'a, [T]> +where + [T]: ToOwned>, +{ + fn encode(&self, s: &mut S) { + let slice: &[T] = self; + slice.encode(s); + } +} + +impl + ToOwned> Decodable for Cow<'static, [T]> +where + [T]: ToOwned>, +{ + fn decode(d: &mut D) -> Cow<'static, [T]> { + let v: Vec = Decodable::decode(d); + Cow::Owned(v) + } +} + +impl<'a, S: Encoder> Encodable for Cow<'a, str> { + fn encode(&self, s: &mut S) { + let val: &str = self; + val.encode(s) + } +} + +impl<'a, D: Decoder> Decodable for Cow<'a, str> { + fn decode(d: &mut D) -> Cow<'static, str> { + let v: String = Decodable::decode(d); + Cow::Owned(v) + } +} + +impl> Encodable for Option { + fn encode(&self, s: &mut S) { + match *self { + None => s.emit_enum_variant(0, |_| {}), + Some(ref v) => s.emit_enum_variant(1, |s| v.encode(s)), + } + } +} + +impl> Decodable for Option { + fn decode(d: &mut D) -> Option { + match d.read_usize() { + 0 => None, + 1 => Some(Decodable::decode(d)), + _ => panic!("Encountered invalid discriminant while decoding `Option`."), + } + } +} + +impl, T2: Encodable> Encodable for Result { + fn encode(&self, s: &mut S) { + match *self { + Ok(ref v) => s.emit_enum_variant(0, |s| v.encode(s)), + Err(ref v) => s.emit_enum_variant(1, |s| v.encode(s)), + } + } +} + +impl, T2: Decodable> Decodable for Result { + fn decode(d: &mut D) -> Result { + match d.read_usize() { + 0 => Ok(T1::decode(d)), + 1 => Err(T2::decode(d)), + _ => panic!("Encountered invalid discriminant while decoding `Result`."), + } + } +} + +macro_rules! peel { + ($name:ident, $($other:ident,)*) => (tuple! { $($other,)* }) +} + +macro_rules! tuple { + () => (); + ( $($name:ident,)+ ) => ( + impl),+> Decodable for ($($name,)+) { + fn decode(d: &mut D) -> ($($name,)+) { + ($({ let element: $name = Decodable::decode(d); element },)+) + } + } + impl),+> Encodable for ($($name,)+) { + #[allow(non_snake_case)] + fn encode(&self, s: &mut S) { + let ($(ref $name,)+) = *self; + $($name.encode(s);)+ + } + } + peel! { $($name,)+ } + ) +} + +tuple! { T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, } + +impl Encodable for path::Path { + fn encode(&self, e: &mut S) { + self.to_str().unwrap().encode(e); + } +} + +impl Encodable for path::PathBuf { + fn encode(&self, e: &mut S) { + path::Path::encode(self, e); + } +} + +impl Decodable for path::PathBuf { + fn decode(d: &mut D) -> path::PathBuf { + let bytes: String = Decodable::decode(d); + path::PathBuf::from(bytes) + } +} + +impl + Copy> Encodable for Cell { + fn encode(&self, s: &mut S) { + self.get().encode(s); + } +} + +impl + Copy> Decodable for Cell { + fn decode(d: &mut D) -> Cell { + Cell::new(Decodable::decode(d)) + } +} + +// FIXME: #15036 +// Should use `try_borrow`, returning an +// `encoder.error("attempting to Encode borrowed RefCell")` +// from `encode` when `try_borrow` returns `None`. + +impl> Encodable for RefCell { + fn encode(&self, s: &mut S) { + self.borrow().encode(s); + } +} + +impl> Decodable for RefCell { + fn decode(d: &mut D) -> RefCell { + RefCell::new(Decodable::decode(d)) + } +} + +impl> Encodable for Arc { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} + +impl> Decodable for Arc { + fn decode(d: &mut D) -> Arc { + Arc::new(Decodable::decode(d)) + } +} + +impl> Encodable for Box { + fn encode(&self, s: &mut S) { + (**self).encode(s); + } +} +impl> Decodable for Box { + fn decode(d: &mut D) -> Box { + Box::new(Decodable::decode(d)) + } +} diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs new file mode 100644 index 000000000..314c07db9 --- /dev/null +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -0,0 +1,91 @@ +#![feature(maybe_uninit_slice)] +#![feature(maybe_uninit_uninit_array)] + +use rustc_serialize::leb128::*; +use std::mem::MaybeUninit; + +macro_rules! impl_test_unsigned_leb128 { + ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => { + #[test] + fn $test_name() { + // Test 256 evenly spaced values of integer range, + // integer max value, and some "random" numbers. + let mut values = Vec::new(); + + let increment = (1 as $int_ty) << ($int_ty::BITS - 8); + values.extend((0..256).map(|i| $int_ty::MIN + i * increment)); + + values.push($int_ty::MAX); + + values.extend( + (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFu64 as $int_ty)), + ); + + let mut stream = Vec::new(); + + for &x in &values { + let mut buf = MaybeUninit::uninit_array(); + stream.extend($write_fn_name(&mut buf, x)); + } + + let mut position = 0; + for &expected in &values { + let actual = $read_fn_name(&stream, &mut position); + assert_eq!(expected, actual); + } + assert_eq!(stream.len(), position); + } + }; +} + +impl_test_unsigned_leb128!(test_u16_leb128, write_u16_leb128, read_u16_leb128, u16); +impl_test_unsigned_leb128!(test_u32_leb128, write_u32_leb128, read_u32_leb128, u32); +impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u64); +impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128); +impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize); + +macro_rules! impl_test_signed_leb128 { + ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => { + #[test] + fn $test_name() { + // Test 256 evenly spaced values of integer range, + // integer max value, and some "random" numbers. + let mut values = Vec::new(); + + let mut value = $int_ty::MIN; + let increment = (1 as $int_ty) << ($int_ty::BITS - 8); + + for _ in 0..256 { + values.push(value); + // The addition in the last loop iteration overflows. + value = value.wrapping_add(increment); + } + + values.push($int_ty::MAX); + + values.extend( + (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFi64 as $int_ty)), + ); + + let mut stream = Vec::new(); + + for &x in &values { + let mut buf = MaybeUninit::uninit_array(); + stream.extend($write_fn_name(&mut buf, x)); + } + + let mut position = 0; + for &expected in &values { + let actual = $read_fn_name(&stream, &mut position); + assert_eq!(expected, actual); + } + assert_eq!(stream.len(), position); + } + }; +} + +impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, read_i16_leb128, i16); +impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, read_i32_leb128, i32); +impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, read_i64_leb128, i64); +impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, read_i128_leb128, i128); +impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, read_isize_leb128, isize); diff --git a/compiler/rustc_serialize/tests/opaque.rs b/compiler/rustc_serialize/tests/opaque.rs new file mode 100644 index 000000000..3a695d071 --- /dev/null +++ b/compiler/rustc_serialize/tests/opaque.rs @@ -0,0 +1,277 @@ +#![allow(rustc::internal)] + +use rustc_macros::{Decodable, Encodable}; +use rustc_serialize::opaque::{MemDecoder, MemEncoder}; +use rustc_serialize::{Decodable, Encodable}; +use std::fmt::Debug; + +#[derive(PartialEq, Clone, Debug, Encodable, Decodable)] +struct Struct { + a: (), + b: u8, + c: u16, + d: u32, + e: u64, + f: usize, + + g: i8, + h: i16, + i: i32, + j: i64, + k: isize, + + l: char, + m: String, + n: f32, + o: f64, + p: bool, + q: Option, +} + +fn check_round_trip< + T: Encodable + for<'a> Decodable> + PartialEq + Debug, +>( + values: Vec, +) { + let mut encoder = MemEncoder::new(); + for value in &values { + Encodable::encode(value, &mut encoder); + } + + let data = encoder.finish(); + let mut decoder = MemDecoder::new(&data[..], 0); + + for value in values { + let decoded = Decodable::decode(&mut decoder); + assert_eq!(value, decoded); + } +} + +#[test] +fn test_unit() { + check_round_trip(vec![(), (), (), ()]); +} + +#[test] +fn test_u8() { + let mut vec = vec![]; + for i in u8::MIN..u8::MAX { + vec.push(i); + } + check_round_trip(vec); +} + +#[test] +fn test_u16() { + for i in u16::MIN..u16::MAX { + check_round_trip(vec![1, 2, 3, i, i, i]); + } +} + +#[test] +fn test_u32() { + check_round_trip(vec![1, 2, 3, u32::MIN, 0, 1, u32::MAX, 2, 1]); +} + +#[test] +fn test_u64() { + check_round_trip(vec![1, 2, 3, u64::MIN, 0, 1, u64::MAX, 2, 1]); +} + +#[test] +fn test_usize() { + check_round_trip(vec![1, 2, 3, usize::MIN, 0, 1, usize::MAX, 2, 1]); +} + +#[test] +fn test_i8() { + let mut vec = vec![]; + for i in i8::MIN..i8::MAX { + vec.push(i); + } + check_round_trip(vec); +} + +#[test] +fn test_i16() { + for i in i16::MIN..i16::MAX { + check_round_trip(vec![-1, 2, -3, i, i, i, 2]); + } +} + +#[test] +fn test_i32() { + check_round_trip(vec![-1, 2, -3, i32::MIN, 0, 1, i32::MAX, 2, 1]); +} + +#[test] +fn test_i64() { + check_round_trip(vec![-1, 2, -3, i64::MIN, 0, 1, i64::MAX, 2, 1]); +} + +#[test] +fn test_isize() { + check_round_trip(vec![-1, 2, -3, isize::MIN, 0, 1, isize::MAX, 2, 1]); +} + +#[test] +fn test_bool() { + check_round_trip(vec![false, true, true, false, false]); +} + +#[test] +fn test_f32() { + let mut vec = vec![]; + for i in -100..100 { + vec.push((i as f32) / 3.0); + } + check_round_trip(vec); +} + +#[test] +fn test_f64() { + let mut vec = vec![]; + for i in -100..100 { + vec.push((i as f64) / 3.0); + } + check_round_trip(vec); +} + +#[test] +fn test_char() { + let vec = vec!['a', 'b', 'c', 'd', 'A', 'X', ' ', '#', 'Ö', 'Ä', 'µ', '€']; + check_round_trip(vec); +} + +#[test] +fn test_string() { + let vec = vec![ + "abcbuÖeiovÄnameÜavmpßvmea€µsbpnvapeapmaebn".to_string(), + "abcbuÖganeiovÄnameÜavmpßvmea€µsbpnvapeapmaebn".to_string(), + "abcbuÖganeiovÄnameÜavmpßvmea€µsbpapmaebn".to_string(), + "abcbuÖganeiovÄnameÜavmpßvmeabpnvapeapmaebn".to_string(), + "abcbuÖganeiÄnameÜavmpßvmea€µsbpnvapeapmaebn".to_string(), + "abcbuÖganeiovÄnameÜavmpßvmea€µsbpmaebn".to_string(), + "abcbuÖganeiovÄnameÜavmpßvmea€µnvapeapmaebn".to_string(), + ]; + + check_round_trip(vec); +} + +#[test] +fn test_option() { + check_round_trip(vec![Some(-1i8)]); + check_round_trip(vec![Some(-2i16)]); + check_round_trip(vec![Some(-3i32)]); + check_round_trip(vec![Some(-4i64)]); + check_round_trip(vec![Some(-5isize)]); + + let none_i8: Option = None; + check_round_trip(vec![none_i8]); + + let none_i16: Option = None; + check_round_trip(vec![none_i16]); + + let none_i32: Option = None; + check_round_trip(vec![none_i32]); + + let none_i64: Option = None; + check_round_trip(vec![none_i64]); + + let none_isize: Option = None; + check_round_trip(vec![none_isize]); +} + +#[test] +fn test_struct() { + check_round_trip(vec![Struct { + a: (), + b: 10, + c: 11, + d: 12, + e: 13, + f: 14, + + g: 15, + h: 16, + i: 17, + j: 18, + k: 19, + + l: 'x', + m: "abc".to_string(), + n: 20.5, + o: 21.5, + p: false, + q: None, + }]); + + check_round_trip(vec![Struct { + a: (), + b: 101, + c: 111, + d: 121, + e: 131, + f: 141, + + g: -15, + h: -16, + i: -17, + j: -18, + k: -19, + + l: 'y', + m: "def".to_string(), + n: -20.5, + o: -21.5, + p: true, + q: Some(1234567), + }]); +} + +#[derive(PartialEq, Clone, Debug, Encodable, Decodable)] +enum Enum { + Variant1, + Variant2(usize, f32), + Variant3 { a: i32, b: char, c: bool }, +} + +#[test] +fn test_enum() { + check_round_trip(vec![ + Enum::Variant1, + Enum::Variant2(1, 2.5), + Enum::Variant3 { a: 3, b: 'b', c: false }, + Enum::Variant3 { a: -4, b: 'f', c: true }, + ]); +} + +#[test] +fn test_sequence() { + let mut vec = vec![]; + for i in -100i64..100i64 { + vec.push(i * 100000); + } + + check_round_trip(vec![vec]); +} + +#[test] +fn test_hash_map() { + use std::collections::HashMap; + let mut map = HashMap::new(); + for i in -100i64..100i64 { + map.insert(i * 100000, i * 10000); + } + + check_round_trip(vec![map]); +} + +#[test] +fn test_tuples() { + check_round_trip(vec![('x', (), false, 0.5f32)]); + check_round_trip(vec![(9i8, 10u16, 1.5f64)]); + check_round_trip(vec![(-12i16, 11u8, 12usize)]); + check_round_trip(vec![(1234567isize, 100000000000000u64, 99999999999999i64)]); + check_round_trip(vec![(String::new(), "some string".to_string())]); +} -- cgit v1.2.3