summaryrefslogtreecommitdiffstats
path: root/compiler/rustc_serialize
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 18:31:44 +0000
commitc23a457e72abe608715ac76f076f47dc42af07a5 (patch)
tree2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /compiler/rustc_serialize
parentReleasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff)
downloadrustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz
rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'compiler/rustc_serialize')
-rw-r--r--compiler/rustc_serialize/src/leb128.rs20
-rw-r--r--compiler/rustc_serialize/src/lib.rs3
-rw-r--r--compiler/rustc_serialize/src/opaque.rs277
-rw-r--r--compiler/rustc_serialize/tests/leb128.rs14
4 files changed, 107 insertions, 207 deletions
diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs
index e568b9e67..ca661bac7 100644
--- a/compiler/rustc_serialize/src/leb128.rs
+++ b/compiler/rustc_serialize/src/leb128.rs
@@ -15,23 +15,20 @@ pub const fn largest_max_leb128_len() -> usize {
macro_rules! impl_write_unsigned_leb128 {
($fn_name:ident, $int_ty:ty) => {
#[inline]
- pub fn $fn_name(
- out: &mut [::std::mem::MaybeUninit<u8>; max_leb128_len::<$int_ty>()],
- mut value: $int_ty,
- ) -> &[u8] {
+ pub fn $fn_name(out: &mut [u8; max_leb128_len::<$int_ty>()], mut value: $int_ty) -> usize {
let mut i = 0;
loop {
if value < 0x80 {
unsafe {
- *out.get_unchecked_mut(i).as_mut_ptr() = value as u8;
+ *out.get_unchecked_mut(i) = value as u8;
}
i += 1;
break;
} else {
unsafe {
- *out.get_unchecked_mut(i).as_mut_ptr() = ((value & 0x7f) | 0x80) as u8;
+ *out.get_unchecked_mut(i) = ((value & 0x7f) | 0x80) as u8;
}
value >>= 7;
@@ -39,7 +36,7 @@ macro_rules! impl_write_unsigned_leb128 {
}
}
- unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) }
+ i
}
};
}
@@ -87,10 +84,7 @@ impl_read_unsigned_leb128!(read_usize_leb128, usize);
macro_rules! impl_write_signed_leb128 {
($fn_name:ident, $int_ty:ty) => {
#[inline]
- pub fn $fn_name(
- out: &mut [::std::mem::MaybeUninit<u8>; max_leb128_len::<$int_ty>()],
- mut value: $int_ty,
- ) -> &[u8] {
+ pub fn $fn_name(out: &mut [u8; max_leb128_len::<$int_ty>()], mut value: $int_ty) -> usize {
let mut i = 0;
loop {
@@ -104,7 +98,7 @@ macro_rules! impl_write_signed_leb128 {
}
unsafe {
- *out.get_unchecked_mut(i).as_mut_ptr() = byte;
+ *out.get_unchecked_mut(i) = byte;
}
i += 1;
@@ -114,7 +108,7 @@ macro_rules! impl_write_signed_leb128 {
}
}
- unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) }
+ i
}
};
}
diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs
index ce8503918..dd40b3cf0 100644
--- a/compiler/rustc_serialize/src/lib.rs
+++ b/compiler/rustc_serialize/src/lib.rs
@@ -17,6 +17,9 @@ Core encoding and decoding interfaces.
#![feature(new_uninit)]
#![feature(allocator_api)]
#![feature(ptr_sub_ptr)]
+#![feature(slice_first_last_chunk)]
+#![feature(inline_const)]
+#![feature(const_option)]
#![cfg_attr(test, feature(test))]
#![allow(rustc::internal)]
#![deny(rustc::untranslatable_diagnostic)]
diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs
index 0ffc537ee..552554390 100644
--- a/compiler/rustc_serialize/src/opaque.rs
+++ b/compiler/rustc_serialize/src/opaque.rs
@@ -3,10 +3,8 @@ use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
use std::fs::File;
use std::io::{self, Write};
use std::marker::PhantomData;
-use std::mem::MaybeUninit;
use std::ops::Range;
use std::path::Path;
-use std::ptr;
// -----------------------------------------------------------------------------
// Encoder
@@ -24,10 +22,12 @@ const BUF_SIZE: usize = 8192;
/// size of the buffer, rather than the full length of the encoded data, and
/// because it doesn't need to reallocate memory along the way.
pub struct FileEncoder {
- /// The input buffer. For adequate performance, we need more control over
- /// buffering than `BufWriter` offers. If `BufWriter` ever offers a raw
- /// buffer access API, we can use it, and remove `buf` and `buffered`.
- buf: Box<[MaybeUninit<u8>]>,
+ // The input buffer. For adequate performance, we need to be able to write
+ // directly to the unwritten region of the buffer, without calling copy_from_slice.
+ // Note that our buffer is always initialized so that we can do that direct access
+ // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the
+ // initialization is approximately free.
+ buf: Box<[u8; BUF_SIZE]>,
buffered: usize,
flushed: usize,
file: File,
@@ -38,12 +38,13 @@ pub struct FileEncoder {
impl FileEncoder {
pub fn new<P: AsRef<Path>>(path: P) -> io::Result<Self> {
- // Create the file for reading and writing, because some encoders do both
- // (e.g. the metadata encoder when -Zmeta-stats is enabled)
+ // File::create opens the file for writing only. When -Zmeta-stats is enabled, the metadata
+ // encoder rewinds the file to inspect what was written. So we need to always open the file
+ // for reading and writing.
let file = File::options().read(true).write(true).create(true).truncate(true).open(path)?;
Ok(FileEncoder {
- buf: Box::new_uninit_slice(BUF_SIZE),
+ buf: vec![0u8; BUF_SIZE].into_boxed_slice().try_into().unwrap(),
buffered: 0,
flushed: 0,
file,
@@ -54,94 +55,19 @@ impl FileEncoder {
#[inline]
pub fn position(&self) -> usize {
// Tracking position this way instead of having a `self.position` field
- // means that we don't have to update the position on every write call.
+ // means that we only need to update `self.buffered` on a write call,
+ // as opposed to updating `self.position` and `self.buffered`.
self.flushed + self.buffered
}
+ #[cold]
+ #[inline(never)]
pub fn flush(&mut self) {
- // This is basically a copy of `BufWriter::flush`. If `BufWriter` ever
- // offers a raw buffer access API, we can use it, and remove this.
-
- /// Helper struct to ensure the buffer is updated after all the writes
- /// are complete. It tracks the number of written bytes and drains them
- /// all from the front of the buffer when dropped.
- struct BufGuard<'a> {
- buffer: &'a mut [u8],
- encoder_buffered: &'a mut usize,
- encoder_flushed: &'a mut usize,
- flushed: usize,
- }
-
- impl<'a> BufGuard<'a> {
- fn new(
- buffer: &'a mut [u8],
- encoder_buffered: &'a mut usize,
- encoder_flushed: &'a mut usize,
- ) -> Self {
- assert_eq!(buffer.len(), *encoder_buffered);
- Self { buffer, encoder_buffered, encoder_flushed, flushed: 0 }
- }
-
- /// The unwritten part of the buffer
- fn remaining(&self) -> &[u8] {
- &self.buffer[self.flushed..]
- }
-
- /// Flag some bytes as removed from the front of the buffer
- fn consume(&mut self, amt: usize) {
- self.flushed += amt;
- }
-
- /// true if all of the bytes have been written
- fn done(&self) -> bool {
- self.flushed >= *self.encoder_buffered
- }
- }
-
- impl Drop for BufGuard<'_> {
- fn drop(&mut self) {
- if self.flushed > 0 {
- if self.done() {
- *self.encoder_flushed += *self.encoder_buffered;
- *self.encoder_buffered = 0;
- } else {
- self.buffer.copy_within(self.flushed.., 0);
- *self.encoder_flushed += self.flushed;
- *self.encoder_buffered -= self.flushed;
- }
- }
- }
- }
-
- // If we've already had an error, do nothing. It'll get reported after
- // `finish` is called.
- if self.res.is_err() {
- return;
- }
-
- let mut guard = BufGuard::new(
- unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[..self.buffered]) },
- &mut self.buffered,
- &mut self.flushed,
- );
-
- while !guard.done() {
- match self.file.write(guard.remaining()) {
- Ok(0) => {
- self.res = Err(io::Error::new(
- io::ErrorKind::WriteZero,
- "failed to write the buffered data",
- ));
- return;
- }
- Ok(n) => guard.consume(n),
- Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
- Err(e) => {
- self.res = Err(e);
- return;
- }
- }
+ if self.res.is_ok() {
+ self.res = self.file.write_all(&self.buf[..self.buffered]);
}
+ self.flushed += self.buffered;
+ self.buffered = 0;
}
pub fn file(&self) -> &File {
@@ -149,91 +75,89 @@ impl FileEncoder {
}
#[inline]
- fn write_one(&mut self, value: u8) {
- let mut buffered = self.buffered;
-
- if std::intrinsics::unlikely(buffered + 1 > BUF_SIZE) {
- self.flush();
- buffered = 0;
- }
+ fn buffer_empty(&mut self) -> &mut [u8] {
+ // SAFETY: self.buffered is inbounds as an invariant of the type
+ unsafe { self.buf.get_unchecked_mut(self.buffered..) }
+ }
- // SAFETY: The above check and `flush` ensures that there is enough
- // room to write the input to the buffer.
- unsafe {
- *MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered) = value;
+ #[cold]
+ #[inline(never)]
+ fn write_all_cold_path(&mut self, buf: &[u8]) {
+ self.flush();
+ if let Some(dest) = self.buf.get_mut(..buf.len()) {
+ dest.copy_from_slice(buf);
+ self.buffered += buf.len();
+ } else {
+ if self.res.is_ok() {
+ self.res = self.file.write_all(buf);
+ }
+ self.flushed += buf.len();
}
-
- self.buffered = buffered + 1;
}
#[inline]
fn write_all(&mut self, buf: &[u8]) {
- let buf_len = buf.len();
-
- if std::intrinsics::likely(buf_len <= BUF_SIZE) {
- let mut buffered = self.buffered;
-
- if std::intrinsics::unlikely(buffered + buf_len > BUF_SIZE) {
- self.flush();
- buffered = 0;
- }
-
- // SAFETY: The above check and `flush` ensures that there is enough
- // room to write the input to the buffer.
- unsafe {
- let src = buf.as_ptr();
- let dst = MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered);
- ptr::copy_nonoverlapping(src, dst, buf_len);
- }
-
- self.buffered = buffered + buf_len;
+ if let Some(dest) = self.buffer_empty().get_mut(..buf.len()) {
+ dest.copy_from_slice(buf);
+ self.buffered += buf.len();
} else {
- self.write_all_unbuffered(buf);
+ self.write_all_cold_path(buf);
}
}
- fn write_all_unbuffered(&mut self, mut buf: &[u8]) {
- // If we've already had an error, do nothing. It'll get reported after
- // `finish` is called.
- if self.res.is_err() {
- return;
- }
-
- if self.buffered > 0 {
+ /// Write up to `N` bytes to this encoder.
+ ///
+ /// This function can be used to avoid the overhead of calling memcpy for writes that
+ /// have runtime-variable length, but are small and have a small fixed upper bound.
+ ///
+ /// This can be used to do in-place encoding as is done for leb128 (without this function
+ /// we would need to write to a temporary buffer then memcpy into the encoder), and it can
+ /// also be used to implement the varint scheme we use for rmeta and dep graph encoding,
+ /// where we only want to encode the first few bytes of an integer. Copying in the whole
+ /// integer then only advancing the encoder state for the few bytes we care about is more
+ /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are
+ /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass
+ /// with this function. Note that common architectures support fixed-size writes up to 8 bytes
+ /// with one instruction, so while this does in some sense do wasted work, we come out ahead.
+ #[inline]
+ pub fn write_with<const N: usize>(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) {
+ let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() };
+ if std::intrinsics::unlikely(self.buffered > flush_threshold) {
self.flush();
}
-
- // This is basically a copy of `Write::write_all` but also updates our
- // `self.flushed`. It's necessary because `Write::write_all` does not
- // return the number of bytes written when an error is encountered, and
- // without that, we cannot accurately update `self.flushed` on error.
- while !buf.is_empty() {
- match self.file.write(buf) {
- Ok(0) => {
- self.res = Err(io::Error::new(
- io::ErrorKind::WriteZero,
- "failed to write whole buffer",
- ));
- return;
- }
- Ok(n) => {
- buf = &buf[n..];
- self.flushed += n;
- }
- Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
- Err(e) => {
- self.res = Err(e);
- return;
- }
- }
+ // SAFETY: We checked above that that N < self.buffer_empty().len(),
+ // and if isn't, flush ensures that our empty buffer is now BUF_SIZE.
+ // We produce a post-mono error if N > BUF_SIZE.
+ let buf = unsafe { self.buffer_empty().first_chunk_mut::<N>().unwrap_unchecked() };
+ let written = visitor(buf);
+ // We have to ensure that an errant visitor cannot cause self.buffered to exeed BUF_SIZE.
+ if written > N {
+ Self::panic_invalid_write::<N>(written);
}
+ self.buffered += written;
+ }
+
+ #[cold]
+ #[inline(never)]
+ fn panic_invalid_write<const N: usize>(written: usize) {
+ panic!("FileEncoder::write_with::<{N}> cannot be used to write {written} bytes");
+ }
+
+ /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array.
+ #[inline]
+ pub fn write_array<const N: usize>(&mut self, buf: [u8; N]) {
+ self.write_with(|dest| {
+ *dest = buf;
+ N
+ })
}
pub fn finish(mut self) -> Result<usize, io::Error> {
self.flush();
-
- let res = std::mem::replace(&mut self.res, Ok(()));
- res.map(|()| self.position())
+ match std::mem::replace(&mut self.res, Ok(())) {
+ Ok(()) => Ok(self.position()),
+ Err(e) => Err(e),
+ }
}
}
@@ -241,7 +165,7 @@ impl Drop for FileEncoder {
fn drop(&mut self) {
// Likely to be a no-op, because `finish` should have been called and
// it also flushes. But do it just in case.
- let _result = self.flush();
+ self.flush();
}
}
@@ -249,26 +173,7 @@ macro_rules! write_leb128 {
($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => {
#[inline]
fn $this_fn(&mut self, v: $int_ty) {
- const MAX_ENCODED_LEN: usize = $crate::leb128::max_leb128_len::<$int_ty>();
-
- let mut buffered = self.buffered;
-
- // This can't overflow because BUF_SIZE and MAX_ENCODED_LEN are both
- // quite small.
- if std::intrinsics::unlikely(buffered + MAX_ENCODED_LEN > BUF_SIZE) {
- self.flush();
- buffered = 0;
- }
-
- // SAFETY: The above check and flush ensures that there is enough
- // room to write the encoded value to the buffer.
- let buf = unsafe {
- &mut *(self.buf.as_mut_ptr().add(buffered)
- as *mut [MaybeUninit<u8>; MAX_ENCODED_LEN])
- };
-
- let encoded = leb128::$write_leb_fn(buf, v);
- self.buffered = buffered + encoded.len();
+ self.write_with(|buf| leb128::$write_leb_fn(buf, v))
}
};
}
@@ -281,12 +186,12 @@ impl Encoder for FileEncoder {
#[inline]
fn emit_u16(&mut self, v: u16) {
- self.write_all(&v.to_le_bytes());
+ self.write_array(v.to_le_bytes());
}
#[inline]
fn emit_u8(&mut self, v: u8) {
- self.write_one(v);
+ self.write_array([v]);
}
write_leb128!(emit_isize, isize, write_isize_leb128);
@@ -296,7 +201,7 @@ impl Encoder for FileEncoder {
#[inline]
fn emit_i16(&mut self, v: i16) {
- self.write_all(&v.to_le_bytes());
+ self.write_array(v.to_le_bytes());
}
#[inline]
@@ -353,7 +258,7 @@ impl<'a> MemDecoder<'a> {
}
#[inline]
- fn read_array<const N: usize>(&mut self) -> [u8; N] {
+ pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
self.read_raw_bytes(N).try_into().unwrap()
}
@@ -495,7 +400,7 @@ impl Encodable<FileEncoder> for IntEncodedWithFixedSize {
#[inline]
fn encode(&self, e: &mut FileEncoder) {
let _start_pos = e.position();
- e.emit_raw_bytes(&self.0.to_le_bytes());
+ e.write_array(self.0.to_le_bytes());
let _end_pos = e.position();
debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE);
}
diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs
index 7872e7784..dc9b32a96 100644
--- a/compiler/rustc_serialize/tests/leb128.rs
+++ b/compiler/rustc_serialize/tests/leb128.rs
@@ -1,8 +1,4 @@
-#![feature(maybe_uninit_slice)]
-#![feature(maybe_uninit_uninit_array)]
-
use rustc_serialize::leb128::*;
-use std::mem::MaybeUninit;
use rustc_serialize::Decoder;
macro_rules! impl_test_unsigned_leb128 {
@@ -24,9 +20,10 @@ macro_rules! impl_test_unsigned_leb128 {
let mut stream = Vec::new();
+ let mut buf = Default::default();
for &x in &values {
- let mut buf = MaybeUninit::uninit_array();
- stream.extend($write_fn_name(&mut buf, x));
+ let n = $write_fn_name(&mut buf, x);
+ stream.extend(&buf[..n]);
}
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
@@ -70,9 +67,10 @@ macro_rules! impl_test_signed_leb128 {
let mut stream = Vec::new();
+ let mut buf = Default::default();
for &x in &values {
- let mut buf = MaybeUninit::uninit_array();
- stream.extend($write_fn_name(&mut buf, x));
+ let n = $write_fn_name(&mut buf, x);
+ stream.extend(&buf[..n]);
}
let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);