summaryrefslogtreecommitdiffstats
path: root/third_party/rust/encoding_rs/src/handles.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/encoding_rs/src/handles.rs')
-rw-r--r--third_party/rust/encoding_rs/src/handles.rs36
1 files changed, 34 insertions, 2 deletions
diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs
index b5404c01d9..f44a834672 100644
--- a/third_party/rust/encoding_rs/src/handles.rs
+++ b/third_party/rust/encoding_rs/src/handles.rs
@@ -34,7 +34,7 @@ use crate::simd_funcs::*;
all(target_endian = "little", target_feature = "neon")
)
))]
-use packed_simd::u16x8;
+use core::simd::u16x8;
use super::DecoderResult;
use super::EncoderResult;
@@ -90,19 +90,23 @@ impl Endian for LittleEndian {
#[derive(Debug, Copy, Clone)]
struct UnalignedU16Slice {
+ // Safety invariant: ptr must be valid for reading 2*len bytes
ptr: *const u8,
len: usize,
}
impl UnalignedU16Slice {
+ /// Safety: ptr must be valid for reading 2*len bytes
#[inline(always)]
pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice {
+ // Safety: field invariant passed up to caller here
UnalignedU16Slice { ptr, len }
}
#[inline(always)]
pub fn trim_last(&mut self) {
assert!(self.len > 0);
+ // Safety: invariant upheld here: a slice is still valid with a shorter len
self.len -= 1;
}
@@ -113,7 +117,9 @@ impl UnalignedU16Slice {
assert!(i < self.len);
unsafe {
let mut u: MaybeUninit<u16> = MaybeUninit::uninit();
+ // Safety: i is at most len - 1, which works here
::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2);
+ // Safety: valid read above lets us do this
u.assume_init()
}
}
@@ -121,8 +127,13 @@ impl UnalignedU16Slice {
#[cfg(feature = "simd-accel")]
#[inline(always)]
pub fn simd_at(&self, i: usize) -> u16x8 {
+ // Safety: i/len are on the scale of u16s, each one corresponds to 2 u8s
assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len);
let byte_index = i * 2;
+ // Safety: load16_unaligned needs SIMD_STRIDE_SIZE=16 u8 elements to read,
+ // or 16/2 = 8 u16 elements to read.
+ // We have checked that we have at least that many above.
+
unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) }
}
@@ -136,6 +147,7 @@ impl UnalignedU16Slice {
// XXX the return value should be restricted not to
// outlive self.
assert!(from <= self.len);
+ // Safety: This upholds the same invariant: `from` is in bounds and we're returning a shorter slice
unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) }
}
@@ -144,6 +156,8 @@ impl UnalignedU16Slice {
pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> {
assert!(self.len <= other.len());
let mut offset = 0;
+ // Safety: SIMD_STRIDE_SIZE is measured in bytes, whereas len is in u16s. We check we can
+ // munch SIMD_STRIDE_SIZE / 2 u16s which means we can write SIMD_STRIDE_SIZE u8s
if SIMD_STRIDE_SIZE / 2 <= self.len {
let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2;
loop {
@@ -151,6 +165,7 @@ impl UnalignedU16Slice {
if E::OPPOSITE_ENDIAN {
simd = simd_byte_swap(simd);
}
+ // Safety: we have enough space on the other side to write this
unsafe {
store8_unaligned(other.as_mut_ptr().add(offset), simd);
}
@@ -158,6 +173,7 @@ impl UnalignedU16Slice {
break;
}
offset += SIMD_STRIDE_SIZE / 2;
+ // Safety: This ensures we still have space for writing SIMD_STRIDE_SIZE u8s
if offset > len_minus_stride {
break;
}
@@ -236,6 +252,7 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
) -> CopyAsciiResult<usize, (u16, usize)> {
let len = ::core::cmp::min(src.len(), dst.len());
let mut offset = 0;
+ // Safety: This check ensures we are able to read/write at least SIMD_STRIDE_SIZE elements
if SIMD_STRIDE_SIZE <= len {
let len_minus_stride = len - SIMD_STRIDE_SIZE;
loop {
@@ -249,10 +266,13 @@ fn copy_unaligned_basic_latin_to_ascii<E: Endian>(
break;
}
let packed = simd_pack(first, second);
+ // Safety: We are able to write SIMD_STRIDE_SIZE elements in this iteration
unsafe {
store16_unaligned(dst.as_mut_ptr().add(offset), packed);
}
offset += SIMD_STRIDE_SIZE;
+ // Safety: This is `offset > len - SIMD_STRIDE_SIZE`, which ensures that we can write at least SIMD_STRIDE_SIZE elements
+ // in the next iteration
if offset > len_minus_stride {
break;
}
@@ -637,7 +657,7 @@ impl<'a> Utf16Destination<'a> {
self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16);
}
#[inline(always)]
- pub fn write_surrogate_pair(&mut self, high: u16, low: u16) {
+ fn write_surrogate_pair(&mut self, high: u16, low: u16) {
self.write_code_unit(high);
self.write_code_unit(low);
}
@@ -646,6 +666,7 @@ impl<'a> Utf16Destination<'a> {
self.write_bmp_excl_ascii(combined);
self.write_bmp_excl_ascii(combining);
}
+ // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
#[inline(always)]
pub fn copy_ascii_from_check_space_bmp<'b>(
&'b mut self,
@@ -659,6 +680,8 @@ impl<'a> Utf16Destination<'a> {
} else {
(DecoderResult::InputEmpty, src_remaining.len())
};
+ // Safety: This function is documented as needing valid pointers for src/dest and len, which
+ // is true since we've passed the minumum length of the two
match unsafe {
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
} {
@@ -667,16 +690,20 @@ impl<'a> Utf16Destination<'a> {
self.pos += length;
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
}
+ // Safety: the function is documented as returning bytes >=0x80 in the Some
Some((non_ascii, consumed)) => {
source.pos += consumed;
self.pos += consumed;
source.pos += 1; // +1 for non_ascii
+ // Safety: non-ascii bubbled out here
non_ascii
}
}
};
+ // Safety: non-ascii returned here
CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self)))
}
+ // Safety-usable invariant: CopyAsciiResult::GoOn will only contain bytes >=0x80
#[inline(always)]
pub fn copy_ascii_from_check_space_astral<'b>(
&'b mut self,
@@ -691,6 +718,8 @@ impl<'a> Utf16Destination<'a> {
} else {
(DecoderResult::InputEmpty, src_remaining.len())
};
+ // Safety: This function is documented as needing valid pointers for src/dest and len, which
+ // is true since we've passed the minumum length of the two
match unsafe {
ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length)
} {
@@ -699,11 +728,13 @@ impl<'a> Utf16Destination<'a> {
self.pos += length;
return CopyAsciiResult::Stop((pending, source.pos, self.pos));
}
+ // Safety: the function is documented as returning bytes >=0x80 in the Some
Some((non_ascii, consumed)) => {
source.pos += consumed;
self.pos += consumed;
if self.pos + 1 < dst_len {
source.pos += 1; // +1 for non_ascii
+ // Safety: non-ascii bubbled out here
non_ascii
} else {
return CopyAsciiResult::Stop((
@@ -715,6 +746,7 @@ impl<'a> Utf16Destination<'a> {
}
}
};
+ // Safety: non-ascii returned here
CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self)))
}
#[inline(always)]