diff options
Diffstat (limited to 'xpcom/rust/nsstring')
-rw-r--r-- | xpcom/rust/nsstring/Cargo.toml | 14 | ||||
-rw-r--r-- | xpcom/rust/nsstring/src/conversions.rs | 751 | ||||
-rw-r--r-- | xpcom/rust/nsstring/src/lib.rs | 1543 |
3 files changed, 2308 insertions, 0 deletions
diff --git a/xpcom/rust/nsstring/Cargo.toml b/xpcom/rust/nsstring/Cargo.toml new file mode 100644 index 0000000000..c2f8e34b45 --- /dev/null +++ b/xpcom/rust/nsstring/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "nsstring" +version = "0.1.0" +authors = ["nobody@mozilla.com"] +license = "MPL-2.0" +description = "Rust bindings to xpcom string types" +edition = "2018" + +[features] +gecko_debug = [] + +[dependencies] +bitflags = "1.0" +encoding_rs = "0.8.0" diff --git a/xpcom/rust/nsstring/src/conversions.rs b/xpcom/rust/nsstring/src/conversions.rs new file mode 100644 index 0000000000..c72c195c08 --- /dev/null +++ b/xpcom/rust/nsstring/src/conversions.rs @@ -0,0 +1,751 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::{ + nsACString, nsAString, nsCStringLike, BulkWriteOk, Gecko_FallibleAssignCString, + Latin1StringLike, +}; +use encoding_rs::mem::*; +use encoding_rs::Encoding; +use std::slice; + +/// Required math stated in the docs of +/// `convert_utf16_to_utf8()`. +#[inline(always)] +fn times_three(a: usize) -> Option<usize> { + a.checked_mul(3) +} + +#[inline(always)] +fn identity(a: usize) -> Option<usize> { + Some(a) +} + +#[inline(always)] +fn plus_one(a: usize) -> Option<usize> { + a.checked_add(1) +} + +/// Typical cache line size per +/// https://stackoverflow.com/questions/14707803/line-size-of-l1-and-l2-caches +/// +/// For consistent behavior, not trying to use 128 on aarch64 +/// or other fanciness like that. +const CACHE_LINE: usize = 64; + +const CACHE_LINE_MASK: usize = CACHE_LINE - 1; + +/// Returns true if the string is both longer than a cache line +/// and the first cache line is ASCII. +#[inline(always)] +fn long_string_starts_with_ascii(buffer: &[u8]) -> bool { + // We examine data only up to the end of the cache line + // to make this check minimally disruptive. + if buffer.len() <= CACHE_LINE { + return false; + } + let bound = CACHE_LINE - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK); + is_ascii(&buffer[..bound]) +} + +/// Returns true if the string is both longer than two cache lines +/// and the first two cache lines are Basic Latin. +#[inline(always)] +fn long_string_stars_with_basic_latin(buffer: &[u16]) -> bool { + // We look at two cache lines with code unit size of two. There is need + // to look at more than one cache line in the UTF-16 case, because looking + // at just one cache line wouldn't catch non-ASCII Latin with high enough + // probability with Latin-script languages that have relatively infrequent + // non-ASCII characters. + if buffer.len() <= CACHE_LINE { + return false; + } + let bound = (CACHE_LINE * 2 - ((buffer.as_ptr() as usize) & CACHE_LINE_MASK)) / 2; + is_basic_latin(&buffer[..bound]) +} + +// Ignoring the copy avoidance complications of conversions between Latin1 and +// UTF-8, a conversion function has the outward form of +// `fn F(&mut self, other: &[T], old_len: usize) -> Result<BulkWriteOk, ()>`, +// where `T` is either `u8` or `u16`. `other` is the slice whose converted +// content are to be appended to `self` and `old_len` indicates how many +// code unit of `self` are to be preserved (0 for the assignment case and +// `self.len()` for the appending case). +// +// As implementation parameters a conversion function needs to know the +// math for computing the worst case conversion length in code units given +// the input length in code units. For a _constant conversion_ the number +// of code units the conversion produces equals the number of code units +// in the input. For a _shinking conversion_ the maximum number of code +// units the conversion can produce equals the number of code units in +// the input, but the conversion can produce fewer code units. Still, due +// to implementation details, the function might want _one_ unit more of +// output space. For an _expanding conversion_ (no need for macro), the +// minimum number of code units produced by the conversion is the number +// of code units in the input, but the conversion can produce more. +// +// Copy avoidance conversions avoid copying a refcounted buffer when it's +// ASCII-only. +// +// Internally, a conversion function needs to know the underlying +// encoding_rs conversion function, the math for computing the required +// output buffer size and, depending on the case, the underlying +// encoding_rs ASCII prefix handling function. + +/// A conversion where the number of code units in the output is potentially +/// smaller than the number of code units in the input. +/// +/// Takes the name of the method to be generated, the name of the conversion +/// function and the type of the input slice. +/// +/// `$name` is the name of the function to generate +/// `$convert` is the underlying `encoding_rs::mem` function to use +/// `$other_ty` is the type of the input slice +/// `$math` is the worst-case length math that `$convert` expects +macro_rules! shrinking_conversion { + (name = $name:ident, + convert = $convert:ident, + other_ty = $other_ty:ty, + math = $math:ident) => { + fn $name(&mut self, other: $other_ty, old_len: usize) -> Result<BulkWriteOk, ()> { + let needed = $math(other.len()).ok_or(())?; + let mut handle = + unsafe { self.bulk_write(old_len.checked_add(needed).ok_or(())?, old_len, false)? }; + let written = $convert(other, &mut handle.as_mut_slice()[old_len..]); + let new_len = old_len + written; + Ok(handle.finish(new_len, new_len > CACHE_LINE)) + } + }; +} + +/// A conversion where the number of code units in the output is always equal +/// to the number of code units in the input. +/// +/// Takes the name of the method to be generated, the name of the conversion +/// function and the type of the input slice. +/// +/// `$name` is the name of the function to generate +/// `$convert` is the underlying `encoding_rs::mem` function to use +/// `$other_ty` is the type of the input slice +macro_rules! constant_conversion { + (name = $name:ident, + convert = $convert:ident, + other_ty = $other_ty:ty) => { + fn $name( + &mut self, + other: $other_ty, + old_len: usize, + allow_shrinking: bool, + ) -> Result<BulkWriteOk, ()> { + let new_len = old_len.checked_add(other.len()).ok_or(())?; + let mut handle = unsafe { self.bulk_write(new_len, old_len, allow_shrinking)? }; + $convert(other, &mut handle.as_mut_slice()[old_len..]); + Ok(handle.finish(new_len, false)) + } + }; +} + +/// An intermediate check for avoiding a copy and having an `nsStringBuffer` +/// refcount increment instead when both `self` and `other` are `nsACString`s, +/// `other` is entirely ASCII and all old data in `self` is discarded. +/// +/// `$name` is the name of the function to generate +/// `$impl` is the underlying conversion that takes a slice and that is used +/// when we can't just adopt the incoming buffer as-is +/// `$string_like` is the kind of input taken +macro_rules! ascii_copy_avoidance { + (name = $name:ident, + implementation = $implementation:ident, + string_like = $string_like:ident) => { + fn $name<T: $string_like + ?Sized>( + &mut self, + other: &T, + old_len: usize, + ) -> Result<BulkWriteOk, ()> { + let adapter = other.adapt(); + let other_slice = adapter.as_ref(); + let num_ascii = if adapter.is_abstract() && old_len == 0 { + let up_to = Encoding::ascii_valid_up_to(other_slice); + if up_to == other_slice.len() { + // Calling something whose argument can be obtained from + // the adapter rather than an nsStringLike avoids a huge + // lifetime mess by keeping nsStringLike and + // Latin1StringLike free of lifetime interdependencies. + if unsafe { Gecko_FallibleAssignCString(self, other.adapt().as_ptr()) } { + return Ok(BulkWriteOk {}); + } else { + return Err(()); + } + } + Some(up_to) + } else { + None + }; + self.$implementation(other_slice, old_len, num_ascii) + } + }; +} + +impl nsAString { + // Valid UTF-8 to UTF-16 + + // Documentation says the destination buffer needs to have + // as many code units as the input. + shrinking_conversion!( + name = fallible_append_str_impl, + convert = convert_str_to_utf16, + other_ty = &str, + math = identity + ); + + /// Convert a valid UTF-8 string into valid UTF-16 and replace the content + /// of this string with the conversion result. + pub fn assign_str(&mut self, other: &str) { + self.fallible_append_str_impl(other, 0) + .expect("Out of memory"); + } + + /// Convert a valid UTF-8 string into valid UTF-16 and fallibly replace the + /// content of this string with the conversion result. + pub fn fallible_assign_str(&mut self, other: &str) -> Result<(), ()> { + self.fallible_append_str_impl(other, 0).map(|_| ()) + } + + /// Convert a valid UTF-8 string into valid UTF-16 and append the conversion + /// to this string. + pub fn append_str(&mut self, other: &str) { + let len = self.len(); + self.fallible_append_str_impl(other, len) + .expect("Out of memory"); + } + + /// Convert a valid UTF-8 string into valid UTF-16 and fallibly append the + /// conversion to this string. + pub fn fallible_append_str(&mut self, other: &str) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_str_impl(other, len).map(|_| ()) + } + + // Potentially-invalid UTF-8 to UTF-16 + + // Documentation says the destination buffer needs to have + // one more code unit than the input. + shrinking_conversion!( + name = fallible_append_utf8_impl, + convert = convert_utf8_to_utf16, + other_ty = &[u8], + math = plus_one + ); + + /// Convert a potentially-invalid UTF-8 string into valid UTF-16 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// replace the content of this string with the conversion result. + pub fn assign_utf8(&mut self, other: &[u8]) { + self.fallible_append_utf8_impl(other, 0) + .expect("Out of memory"); + } + + /// Convert a potentially-invalid UTF-8 string into valid UTF-16 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// fallibly replace the content of this string with the conversion result. + pub fn fallible_assign_utf8(&mut self, other: &[u8]) -> Result<(), ()> { + self.fallible_append_utf8_impl(other, 0).map(|_| ()) + } + + /// Convert a potentially-invalid UTF-8 string into valid UTF-16 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// append the conversion result to this string. + pub fn append_utf8(&mut self, other: &[u8]) { + let len = self.len(); + self.fallible_append_utf8_impl(other, len) + .expect("Out of memory"); + } + + /// Convert a potentially-invalid UTF-8 string into valid UTF-16 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// fallibly append the conversion result to this string. + pub fn fallible_append_utf8(&mut self, other: &[u8]) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_utf8_impl(other, len).map(|_| ()) + } + + // Latin1 to UTF-16 + + constant_conversion!( + name = fallible_append_latin1_impl, + convert = convert_latin1_to_utf16, + other_ty = &[u8] + ); + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-16 and replace the content of this string with the conversion result. + pub fn assign_latin1(&mut self, other: &[u8]) { + self.fallible_append_latin1_impl(other, 0, true) + .expect("Out of memory"); + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-16 and fallibly replace the content of this string with the + /// conversion result. + pub fn fallible_assign_latin1(&mut self, other: &[u8]) -> Result<(), ()> { + self.fallible_append_latin1_impl(other, 0, true).map(|_| ()) + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-16 and append the conversion result to this string. + pub fn append_latin1(&mut self, other: &[u8]) { + let len = self.len(); + self.fallible_append_latin1_impl(other, len, false) + .expect("Out of memory"); + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-16 and fallibly append the conversion result to this string. + pub fn fallible_append_latin1(&mut self, other: &[u8]) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_latin1_impl(other, len, false) + .map(|_| ()) + } +} + +impl nsACString { + // UTF-16 to UTF-8 + + fn fallible_append_utf16_to_utf8_impl( + &mut self, + other: &[u16], + old_len: usize, + ) -> Result<BulkWriteOk, ()> { + // We first size the buffer for ASCII if the first two cache lines are ASCII. If that turns out + // not to be enough, we size for the worst case given the length of the remaining input at that + // point. BUT if the worst case fits inside the inline capacity of an autostring, we skip + // the ASCII stuff. + let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() { + let worst_case = times_three(other.len()).ok_or(())?; + if worst_case <= inline_capacity { + Some(worst_case) + } else { + None + } + } else { + None + }; + let (filled, read, mut handle) = + if worst_case_needed.is_none() && long_string_stars_with_basic_latin(other) { + let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?; + let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? }; + let (read, written) = + convert_utf16_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]); + let left = other.len() - read; + if left == 0 { + return Ok(handle.finish(old_len + written, true)); + } + let filled = old_len + written; + let needed = times_three(left).ok_or(())?; + let new_len = filled.checked_add(needed).ok_or(())?; + unsafe { + handle.restart_bulk_write(new_len, filled, false)?; + } + (filled, read, handle) + } else { + // Started with non-ASCII. Compute worst case + let needed = if let Some(n) = worst_case_needed { + n + } else { + times_three(other.len()).ok_or(())? + }; + let new_len = old_len.checked_add(needed).ok_or(())?; + let handle = unsafe { self.bulk_write(new_len, old_len, false)? }; + (old_len, 0, handle) + }; + let written = convert_utf16_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]); + Ok(handle.finish(filled + written, true)) + } + + /// Convert a potentially-invalid UTF-16 string into valid UTF-8 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// replace the content of this string with the conversion result. + pub fn assign_utf16_to_utf8(&mut self, other: &[u16]) { + self.fallible_append_utf16_to_utf8_impl(other, 0) + .expect("Out of memory"); + } + + /// Convert a potentially-invalid UTF-16 string into valid UTF-8 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// fallibly replace the content of this string with the conversion result. + pub fn fallible_assign_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> { + self.fallible_append_utf16_to_utf8_impl(other, 0) + .map(|_| ()) + } + + /// Convert a potentially-invalid UTF-16 string into valid UTF-8 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// append the conversion result to this string. + pub fn append_utf16_to_utf8(&mut self, other: &[u16]) { + let len = self.len(); + self.fallible_append_utf16_to_utf8_impl(other, len) + .expect("Out of memory"); + } + + /// Convert a potentially-invalid UTF-16 string into valid UTF-8 + /// (replacing invalid sequences with the REPLACEMENT CHARACTER) and + /// fallibly append the conversion result to this string. + pub fn fallible_append_utf16_to_utf8(&mut self, other: &[u16]) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_utf16_to_utf8_impl(other, len) + .map(|_| ()) + } + + // UTF-16 to Latin1 + + constant_conversion!( + name = fallible_append_utf16_to_latin1_lossy_impl, + convert = convert_utf16_to_latin1_lossy, + other_ty = &[u16] + ); + + /// Convert a UTF-16 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// replace the content of this string with the conversion result. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-16, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) { + self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true) + .expect("Out of memory"); + } + + /// Convert a UTF-16 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// fallibly replace the content of this string with the conversion result. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-16, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn fallible_assign_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> { + self.fallible_append_utf16_to_latin1_lossy_impl(other, 0, true) + .map(|_| ()) + } + + /// Convert a UTF-16 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// append the conversion result to this string. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-16, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn append_utf16_to_latin1_lossy(&mut self, other: &[u16]) { + let len = self.len(); + self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false) + .expect("Out of memory"); + } + + /// Convert a UTF-16 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// fallibly append the conversion result to this string. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-16, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn fallible_append_utf16_to_latin1_lossy(&mut self, other: &[u16]) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_utf16_to_latin1_lossy_impl(other, len, false) + .map(|_| ()) + } + + // UTF-8 to Latin1 + + ascii_copy_avoidance!( + name = fallible_append_utf8_to_latin1_lossy_check, + implementation = fallible_append_utf8_to_latin1_lossy_impl, + string_like = nsCStringLike + ); + + fn fallible_append_utf8_to_latin1_lossy_impl( + &mut self, + other: &[u8], + old_len: usize, + maybe_num_ascii: Option<usize>, + ) -> Result<BulkWriteOk, ()> { + let new_len = old_len.checked_add(other.len()).ok_or(())?; + let num_ascii = maybe_num_ascii.unwrap_or(0); + // Already checked for overflow above, so this can't overflow. + let old_len_plus_num_ascii = old_len + num_ascii; + let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? }; + let written = { + let buffer = handle.as_mut_slice(); + if num_ascii != 0 { + (&mut buffer[old_len..old_len_plus_num_ascii]).copy_from_slice(&other[..num_ascii]); + } + convert_utf8_to_latin1_lossy(&other[num_ascii..], &mut buffer[old_len_plus_num_ascii..]) + }; + Ok(handle.finish(old_len_plus_num_ascii + written, true)) + } + + /// Convert a UTF-8 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// replace the content of this string with the conversion result. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-8, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) { + self.fallible_append_utf8_to_latin1_lossy_check(other, 0) + .expect("Out of memory"); + } + + /// Convert a UTF-8 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// fallibly replace the content of this string with the conversion result. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-8, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn fallible_assign_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( + &mut self, + other: &T, + ) -> Result<(), ()> { + self.fallible_append_utf8_to_latin1_lossy_check(other, 0) + .map(|_| ()) + } + + /// Convert a UTF-8 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// append the conversion result to this string. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-8, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>(&mut self, other: &T) { + let len = self.len(); + self.fallible_append_utf8_to_latin1_lossy_check(other, len) + .expect("Out of memory"); + } + + /// Convert a UTF-8 string whose all code points are below U+0100 into + /// a Latin1 (scalar value is byte value; not windows-1252!) string and + /// fallibly append the conversion result to this string. + /// + /// # Panics + /// + /// If the input contains code points above U+00FF or is not valid UTF-8, + /// panics in debug mode and produces garbage in a memory-safe way in + /// release builds. The nature of the garbage may differ based on CPU + /// architecture and must not be relied upon. + pub fn fallible_append_utf8_to_latin1_lossy<T: nsCStringLike + ?Sized>( + &mut self, + other: &T, + ) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_utf8_to_latin1_lossy_check(other, len) + .map(|_| ()) + } + + // Latin1 to UTF-8 CString + + ascii_copy_avoidance!( + name = fallible_append_latin1_to_utf8_check, + implementation = fallible_append_latin1_to_utf8_impl, + string_like = Latin1StringLike + ); + + fn fallible_append_latin1_to_utf8_impl( + &mut self, + other: &[u8], + old_len: usize, + maybe_num_ascii: Option<usize>, + ) -> Result<BulkWriteOk, ()> { + let (filled, read, mut handle) = if let Some(num_ascii) = maybe_num_ascii { + // Wrapper checked for ASCII + let left = other.len() - num_ascii; + let filled = old_len + num_ascii; + let needed = left.checked_mul(2).ok_or(())?; + let new_len = filled.checked_add(needed).ok_or(())?; + let mut handle = unsafe { self.bulk_write(new_len, old_len, false)? }; + if num_ascii != 0 { + (&mut handle.as_mut_slice()[old_len..filled]).copy_from_slice(&other[..num_ascii]); + } + (filled, num_ascii, handle) + } else { + let worst_case_needed = if let Some(inline_capacity) = self.inline_capacity() { + let worst_case = other.len().checked_mul(2).ok_or(())?; + if worst_case <= inline_capacity { + Some(worst_case) + } else { + None + } + } else { + None + }; + if worst_case_needed.is_none() && long_string_starts_with_ascii(other) { + // Wrapper didn't check for ASCII, so let's see if `other` starts with ASCII + // `other` starts with ASCII, so let's first size the buffer + // with optimism that it's ASCII-only. + let new_len_with_ascii = old_len.checked_add(other.len()).ok_or(())?; + let mut handle = unsafe { self.bulk_write(new_len_with_ascii, old_len, false)? }; + let (read, written) = + convert_latin1_to_utf8_partial(other, &mut handle.as_mut_slice()[old_len..]); + let left = other.len() - read; + let filled = old_len + written; + if left == 0 { + // `other` fit in the initial allocation + return Ok(handle.finish(filled, true)); + } + let needed = left.checked_mul(2).ok_or(())?; + let new_len = filled.checked_add(needed).ok_or(())?; + unsafe { + handle.restart_bulk_write(new_len, filled, false)?; + } + (filled, read, handle) + } else { + // Started with non-ASCII. Assume worst case. + let needed = if let Some(n) = worst_case_needed { + n + } else { + other.len().checked_mul(2).ok_or(())? + }; + let new_len = old_len.checked_add(needed).ok_or(())?; + let handle = unsafe { self.bulk_write(new_len, old_len, false)? }; + (old_len, 0, handle) + } + }; + let written = convert_latin1_to_utf8(&other[read..], &mut handle.as_mut_slice()[filled..]); + Ok(handle.finish(filled + written, true)) + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-8 and replace the content of this string with the conversion result. + pub fn assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) { + self.fallible_append_latin1_to_utf8_check(other, 0) + .expect("Out of memory"); + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-8 and fallibly replace the content of this string with the + /// conversion result. + pub fn fallible_assign_latin1_to_utf8<T: Latin1StringLike + ?Sized>( + &mut self, + other: &T, + ) -> Result<(), ()> { + self.fallible_append_latin1_to_utf8_check(other, 0) + .map(|_| ()) + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-8 and append the conversion result to this string. + pub fn append_latin1_to_utf8<T: Latin1StringLike + ?Sized>(&mut self, other: &T) { + let len = self.len(); + self.fallible_append_latin1_to_utf8_check(other, len) + .expect("Out of memory"); + } + + /// Convert a Latin1 (i.e. byte value equals scalar value; not windows-1252!) + /// into UTF-8 and fallibly append the conversion result to this string. + pub fn fallible_append_latin1_to_utf8<T: Latin1StringLike + ?Sized>( + &mut self, + other: &T, + ) -> Result<(), ()> { + let len = self.len(); + self.fallible_append_latin1_to_utf8_check(other, len) + .map(|_| ()) + } +} + +#[no_mangle] +pub unsafe extern "C" fn nsstring_fallible_append_utf8_impl( + this: *mut nsAString, + other: *const u8, + other_len: usize, + old_len: usize, +) -> bool { + let other_slice = slice::from_raw_parts(other, other_len); + (*this) + .fallible_append_utf8_impl(other_slice, old_len) + .is_ok() +} + +#[no_mangle] +pub unsafe extern "C" fn nsstring_fallible_append_latin1_impl( + this: *mut nsAString, + other: *const u8, + other_len: usize, + old_len: usize, + allow_shrinking: bool, +) -> bool { + let other_slice = slice::from_raw_parts(other, other_len); + (*this) + .fallible_append_latin1_impl(other_slice, old_len, allow_shrinking) + .is_ok() +} + +#[no_mangle] +pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_utf8_impl( + this: *mut nsACString, + other: *const u16, + other_len: usize, + old_len: usize, +) -> bool { + let other_slice = slice::from_raw_parts(other, other_len); + (*this) + .fallible_append_utf16_to_utf8_impl(other_slice, old_len) + .is_ok() +} + +#[no_mangle] +pub unsafe extern "C" fn nscstring_fallible_append_utf16_to_latin1_lossy_impl( + this: *mut nsACString, + other: *const u16, + other_len: usize, + old_len: usize, + allow_shrinking: bool, +) -> bool { + let other_slice = slice::from_raw_parts(other, other_len); + (*this) + .fallible_append_utf16_to_latin1_lossy_impl(other_slice, old_len, allow_shrinking) + .is_ok() +} + +#[no_mangle] +pub unsafe extern "C" fn nscstring_fallible_append_utf8_to_latin1_lossy_check( + this: *mut nsACString, + other: *const nsACString, + old_len: usize, +) -> bool { + (*this) + .fallible_append_utf8_to_latin1_lossy_check(&*other, old_len) + .is_ok() +} + +#[no_mangle] +pub unsafe extern "C" fn nscstring_fallible_append_latin1_to_utf8_check( + this: *mut nsACString, + other: *const nsACString, + old_len: usize, +) -> bool { + (*this) + .fallible_append_latin1_to_utf8_check(&*other, old_len) + .is_ok() +} diff --git a/xpcom/rust/nsstring/src/lib.rs b/xpcom/rust/nsstring/src/lib.rs new file mode 100644 index 0000000000..521c2c8c04 --- /dev/null +++ b/xpcom/rust/nsstring/src/lib.rs @@ -0,0 +1,1543 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! This module provides rust bindings for the XPCOM string types. +//! +//! # TL;DR (what types should I use) +//! +//! Use `&{mut,} nsA[C]String` for functions in rust which wish to take or +//! mutate XPCOM strings. The other string types `Deref` to this type. +//! +//! Use `ns[C]String` (`ns[C]String` in C++) for string struct members, and as +//! an intermediate between rust string data structures (such as `String` or +//! `Vec<u16>`) and `&{mut,} nsA[C]String` (using `ns[C]String::from(value)`). +//! These conversions will attempt to re-use the passed-in buffer, appending a +//! null. +//! +//! Use `ns[C]Str` (`nsDependent[C]String` in C++) as an intermediate between +//! borrowed rust data structures (such as `&str` and `&[u16]`) and `&{mut,} +//! nsA[C]String` (using `ns[C]Str::from(value)`). These conversions should not +//! perform any allocations. This type is not safe to share with `C++` as a +//! struct field, but passing the borrowed `&{mut,} nsA[C]String` over FFI is +//! safe. +//! +//! Use `*{const,mut} nsA[C]String` (`{const,} nsA[C]String*` in C++) for +//! function arguments passed across the rust/C++ language boundary. +//! +//! There is currently no Rust equivalent to `nsAuto[C]String`. Implementing a +//! type that contains a pointer to an inline buffer is difficult in Rust due +//! to its move semantics, which require that it be safe to move a value by +//! copying its bits. If such a type is genuinely needed at some point, +//! <https://bugzilla.mozilla.org/show_bug.cgi?id=1403506#c6> has a sketch of +//! how to emulate it via macros. +//! +//! # String Types +//! +//! ## `nsA[C]String` +//! +//! The core types in this module are `nsAString` and `nsACString`. These types +//! are zero-sized as far as rust is concerned, and are safe to pass around +//! behind both references (in rust code), and pointers (in C++ code). They +//! represent a handle to a XPCOM string which holds either `u16` or `u8` +//! characters respectively. The backing character buffer is guaranteed to live +//! as long as the reference to the `nsAString` or `nsACString`. +//! +//! These types in rust are simply used as dummy types. References to them +//! represent a pointer to the beginning of a variable-sized `#[repr(C)]` struct +//! which is common between both C++ and Rust implementations. In C++, their +//! corresponding types are also named `nsAString` or `nsACString`, and they are +//! defined within the `nsTSubstring.{cpp,h}` file. +//! +//! ### Valid Operations +//! +//! An `&nsA[C]String` acts like rust's `&str`, in that it is a borrowed +//! reference to the backing data. When used as an argument to other functions +//! on `&mut nsA[C]String`, optimizations can be performed to avoid copying +//! buffers, as information about the backing storage is preserved. +//! +//! An `&mut nsA[C]String` acts like rust's `&mut Cow<str>`, in that it is a +//! mutable reference to a potentially borrowed string, which when modified will +//! ensure that it owns its own backing storage. This type can be appended to +//! with the methods `.append`, `.append_utf{8,16}`, and with the `write!` +//! macro, and can be assigned to with `.assign`. +//! +//! ## `ns[C]Str<'a>` +//! +//! This type is an maybe-owned string type. It acts similarially to a +//! `Cow<[{u8,u16}]>`. This type provides `Deref` and `DerefMut` implementations +//! to `nsA[C]String`, which provides the methods for manipulating this type. +//! This type's lifetime parameter, `'a`, represents the lifetime of the backing +//! storage. When modified this type may re-allocate in order to ensure that it +//! does not mutate its backing storage. +//! +//! `ns[C]Str`s can be constructed either with `ns[C]Str::new()`, which creates +//! an empty `ns[C]Str<'static>`, or through one of the provided `From` +//! implementations. Only `nsCStr` can be constructed `From<'a str>`, as +//! constructing a `nsStr` would require transcoding. Use `ns[C]String` instead. +//! +//! When passing this type by reference, prefer passing a `&nsA[C]String` or +//! `&mut nsA[C]String`. to passing this type. +//! +//! When passing this type across the language boundary, pass it as `*const +//! nsA[C]String` for an immutable reference, or `*mut nsA[C]String` for a +//! mutable reference. +//! +//! ## `ns[C]String` +//! +//! This type is an owned, null-terminated string type. This type provides +//! `Deref` and `DerefMut` implementations to `nsA[C]String`, which provides the +//! methods for manipulating this type. +//! +//! `ns[C]String`s can be constructed either with `ns[C]String::new()`, which +//! creates an empty `ns[C]String`, or through one of the provided `From` +//! implementations, which will try to avoid reallocating when possible, +//! although a terminating `null` will be added. +//! +//! When passing this type by reference, prefer passing a `&nsA[C]String` or +//! `&mut nsA[C]String`. to passing this type. +//! +//! When passing this type across the language boundary, pass it as `*const +//! nsA[C]String` for an immutable reference, or `*mut nsA[C]String` for a +//! mutable reference. This struct may also be included in `#[repr(C)]` structs +//! shared with C++. +//! +//! ## `ns[C]StringRepr` +//! +//! This crate also provides the type `ns[C]StringRepr` which acts conceptually +//! similar to an `ns[C]String`, however, it does not have a `Drop` +//! implementation. +//! +//! If this type is dropped in rust, it will not free its backing storage. This +//! can be useful when implementing FFI types which contain `ns[C]String` members +//! which invoke their member's destructors through C++ code. + +#![allow(non_camel_case_types)] +#![allow(clippy::missing_safety_doc)] +#![allow(clippy::new_without_default)] +#![allow(clippy::result_unit_err)] + +use bitflags::bitflags; +use std::borrow; +use std::cmp; +use std::fmt; +use std::marker::PhantomData; +use std::mem; +use std::ops::{Deref, DerefMut}; +use std::os::raw::c_void; +use std::ptr; +use std::slice; +use std::str; + +mod conversions; + +pub use self::conversions::nscstring_fallible_append_latin1_to_utf8_check; +pub use self::conversions::nscstring_fallible_append_utf16_to_latin1_lossy_impl; +pub use self::conversions::nscstring_fallible_append_utf16_to_utf8_impl; +pub use self::conversions::nscstring_fallible_append_utf8_to_latin1_lossy_check; +pub use self::conversions::nsstring_fallible_append_latin1_impl; +pub use self::conversions::nsstring_fallible_append_utf8_impl; + +/// A type for showing that `finish()` was called on a `BulkWriteHandle`. +/// Instantiating this type from elsewhere is basically an assertion that +/// there is no `BulkWriteHandle` around, so be very careful with instantiating +/// this type! +pub struct BulkWriteOk; + +/// Semi-arbitrary threshold below which we don't care about shrinking +/// buffers to size. Currently matches `CACHE_LINE` in the `conversions` +/// module. +const SHRINKING_THRESHOLD: usize = 64; + +/////////////////////////////////// +// Internal Implementation Flags // +/////////////////////////////////// + +bitflags! { + // While this has the same layout as u16, it cannot be passed + // over FFI safely as a u16. + #[repr(C)] + struct DataFlags: u16 { + const TERMINATED = 1 << 0; // IsTerminated returns true + const VOIDED = 1 << 1; // IsVoid returns true + const REFCOUNTED = 1 << 2; // mData points to a heap-allocated, shareable, refcounted + // buffer + const OWNED = 1 << 3; // mData points to a heap-allocated, raw buffer + const INLINE = 1 << 4; // mData points to a writable, inline buffer + const LITERAL = 1 << 5; // mData points to a string literal; TERMINATED will also be set + } +} + +bitflags! { + // While this has the same layout as u16, it cannot be passed + // over FFI safely as a u16. + #[repr(C)] + struct ClassFlags: u16 { + const INLINE = 1 << 0; // |this|'s buffer is inline + const NULL_TERMINATED = 1 << 1; // |this| requires its buffer is null-terminated + } +} + +//////////////////////////////////// +// Generic String Bindings Macros // +//////////////////////////////////// + +macro_rules! string_like { + { + char_t = $char_t: ty; + + AString = $AString: ident; + String = $String: ident; + Str = $Str: ident; + + StringLike = $StringLike: ident; + StringAdapter = $StringAdapter: ident; + } => { + /// This trait is implemented on types which are `ns[C]String`-like, in + /// that they can at very low cost be converted to a borrowed + /// `&nsA[C]String`. Unfortunately, the intermediate type + /// `ns[C]StringAdapter` is required as well due to types like `&[u8]` + /// needing to be (cheaply) wrapped in a `nsCString` on the stack to + /// create the `&nsACString`. + /// + /// This trait is used to DWIM when calling the methods on + /// `nsA[C]String`. + pub trait $StringLike { + fn adapt(&self) -> $StringAdapter; + } + + impl<'a, T: $StringLike + ?Sized> $StringLike for &'a T { + fn adapt(&self) -> $StringAdapter { + <T as $StringLike>::adapt(*self) + } + } + + impl<'a, T> $StringLike for borrow::Cow<'a, T> + where T: $StringLike + borrow::ToOwned + ?Sized { + fn adapt(&self) -> $StringAdapter { + <T as $StringLike>::adapt(self.as_ref()) + } + } + + impl $StringLike for $AString { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Abstract(self) + } + } + + impl<'a> $StringLike for $Str<'a> { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Abstract(self) + } + } + + impl $StringLike for $String { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Abstract(self) + } + } + + impl $StringLike for [$char_t] { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Borrowed($Str::from(self)) + } + } + + impl $StringLike for Vec<$char_t> { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Borrowed($Str::from(&self[..])) + } + } + + impl $StringLike for Box<[$char_t]> { + fn adapt(&self) -> $StringAdapter { + $StringAdapter::Borrowed($Str::from(&self[..])) + } + } + } +} + +impl<'a> Drop for nsAStringBulkWriteHandle<'a> { + /// This only runs in error cases. In success cases, `finish()` + /// calls `forget(self)`. + fn drop(&mut self) { + if self.capacity == 0 { + // If capacity is 0, the string is a zero-length + // string, so we have nothing to do. + return; + } + // The old zero terminator may be gone by now, so we need + // to write a new one somewhere and make length match. + // We can use a length between 1 and self.capacity. + // Seems prudent to overwrite the uninitialized memory. + // Using the length 1 leaves the shortest memory to overwrite. + // U+FFFD is the safest placeholder. Merely truncating the + // string to a zero-length string might be dangerous in some + // scenarios. See + // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences + // for closely related scenario. + unsafe { + let mut this = self.string.as_repr_mut(); + this.as_mut().length = 1u32; + *(this.as_mut().data.as_mut()) = 0xFFFDu16; + *(this.as_mut().data.as_ptr().add(1)) = 0; + } + } +} + +impl<'a> Drop for nsACStringBulkWriteHandle<'a> { + /// This only runs in error cases. In success cases, `finish()` + /// calls `forget(self)`. + fn drop(&mut self) { + if self.capacity == 0 { + // If capacity is 0, the string is a zero-length + // string, so we have nothing to do. + return; + } + // The old zero terminator may be gone by now, so we need + // to write a new one somewhere and make length match. + // We can use a length between 1 and self.capacity. + // Seems prudent to overwrite the uninitialized memory. + // Using the length 1 leaves the shortest memory to overwrite. + // U+FFFD is the safest placeholder, but when it doesn't fit, + // let's use ASCII substitute. Merely truncating the + // string to a zero-length string might be dangerous in some + // scenarios. See + // https://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences + // for closely related scenario. + unsafe { + let mut this = self.string.as_repr_mut(); + if self.capacity >= 3 { + this.as_mut().length = 3u32; + *(this.as_mut().data.as_mut()) = 0xEFu8; + *(this.as_mut().data.as_ptr().add(1)) = 0xBFu8; + *(this.as_mut().data.as_ptr().add(2)) = 0xBDu8; + *(this.as_mut().data.as_ptr().add(3)) = 0; + } else { + this.as_mut().length = 1u32; + *(this.as_mut().data.as_mut()) = 0x1Au8; // U+FFFD doesn't fit + *(this.as_mut().data.as_ptr().add(1)) = 0; + } + } + } +} + +macro_rules! define_string_types { + { + char_t = $char_t: ty; + + AString = $AString: ident; + String = $String: ident; + Str = $Str: ident; + + StringLike = $StringLike: ident; + StringAdapter = $StringAdapter: ident; + + StringRepr = $StringRepr: ident; + AutoStringRepr = $AutoStringRepr: ident; + + BulkWriteHandle = $BulkWriteHandle: ident; + + drop = $drop: ident; + assign = $assign: ident, $fallible_assign: ident; + take_from = $take_from: ident, $fallible_take_from: ident; + append = $append: ident, $fallible_append: ident; + set_length = $set_length: ident, $fallible_set_length: ident; + begin_writing = $begin_writing: ident, $fallible_begin_writing: ident; + start_bulk_write = $start_bulk_write: ident; + } => { + /// The representation of a `ns[C]String` type in C++. This type is + /// used internally by our definition of `ns[C]String` to ensure layout + /// compatibility with the C++ `ns[C]String` type. + /// + /// This type may also be used in place of a C++ `ns[C]String` inside of + /// struct definitions which are shared with C++, as it has identical + /// layout to our `ns[C]String` type. + /// + /// This struct will leak its data if dropped from rust. See the module + /// documentation for more information on this type. + #[repr(C)] + #[derive(Debug)] + pub struct $StringRepr { + data: ptr::NonNull<$char_t>, + length: u32, + dataflags: DataFlags, + classflags: ClassFlags, + } + + impl $StringRepr { + fn new(classflags: ClassFlags) -> $StringRepr { + static NUL: $char_t = 0; + $StringRepr { + data: unsafe { ptr::NonNull::new_unchecked(&NUL as *const _ as *mut _) }, + length: 0, + dataflags: DataFlags::TERMINATED | DataFlags::LITERAL, + classflags, + } + } + } + + impl Deref for $StringRepr { + type Target = $AString; + fn deref(&self) -> &$AString { + unsafe { + &*(self as *const _ as *const $AString) + } + } + } + + impl DerefMut for $StringRepr { + fn deref_mut(&mut self) -> &mut $AString { + unsafe { + &mut *(self as *mut _ as *mut $AString) + } + } + } + + #[repr(C)] + #[derive(Debug)] + pub struct $AutoStringRepr { + super_repr: $StringRepr, + inline_capacity: u32, + } + + pub struct $BulkWriteHandle<'a> { + string: &'a mut $AString, + capacity: usize, + } + + impl<'a> $BulkWriteHandle<'a> { + fn new(string: &'a mut $AString, capacity: usize) -> Self { + $BulkWriteHandle{ string, capacity } + } + + pub unsafe fn restart_bulk_write(&mut self, + capacity: usize, + units_to_preserve: usize, + allow_shrinking: bool) -> Result<(), ()> { + self.capacity = + self.string.start_bulk_write_impl(capacity, + units_to_preserve, + allow_shrinking)?; + Ok(()) + } + + pub fn finish(mut self, length: usize, allow_shrinking: bool) -> BulkWriteOk { + // NOTE: Drop is implemented outside the macro earlier in this file, + // because it needs to deal with different code unit representations + // for the REPLACEMENT CHARACTER in the UTF-16 and UTF-8 cases and + // needs to deal with a REPLACEMENT CHARACTER not fitting in the + // buffer in the UTF-8 case. + assert!(length <= self.capacity); + if length == 0 { + // `truncate()` is OK even when the string + // is in invalid state. + self.string.truncate(); + mem::forget(self); // Don't run the failure path in drop() + return BulkWriteOk{}; + } + if allow_shrinking && length > SHRINKING_THRESHOLD { + unsafe { + let _ = self.restart_bulk_write(length, length, true); + } + } + unsafe { + let mut this = self.string.as_repr_mut(); + this.as_mut().length = length as u32; + *(this.as_mut().data.as_ptr().add(length)) = 0; + if cfg!(debug_assertions) { + // Overwrite the unused part in debug builds. Note + // that capacity doesn't include space for the zero + // terminator, so starting after the zero-terminator + // we wrote ends up overwriting the terminator space + // not reflected in the capacity number. + // write_bytes() takes care of multiplying the length + // by the size of T. + ptr::write_bytes(this.as_mut().data.as_ptr().add(length + 1), + 0xE4u8, + self.capacity - length); + } + // We don't have a Rust interface for mozilla/MemoryChecking.h, + // so let's just not communicate with MSan/Valgrind here. + } + mem::forget(self); // Don't run the failure path in drop() + BulkWriteOk{} + } + + pub fn as_mut_slice(&mut self) -> &mut [$char_t] { + unsafe { + let mut this = self.string.as_repr_mut(); + slice::from_raw_parts_mut(this.as_mut().data.as_ptr(), self.capacity) + } + } + } + + /// This type is the abstract type which is used for interacting with + /// strings in rust. Each string type can derefence to an instance of + /// this type, which provides the useful operations on strings. + /// + /// NOTE: Rust thinks this type has a size of 0, because the data + /// associated with it is not necessarially safe to move. It is not safe + /// to construct a nsAString yourself, unless it is received by + /// dereferencing one of these types. + /// + /// NOTE: The `[u8; 0]` member is zero sized, and only exists to prevent + /// the construction by code outside of this module. It is used instead + /// of a private `()` member because the `improper_ctypes` lint complains + /// about some ZST members in `extern "C"` function declarations. + #[repr(C)] + pub struct $AString { + _prohibit_constructor: [u8; 0], + } + + impl $AString { + /// Assign the value of `other` into self, overwriting any value + /// currently stored. Performs an optimized assignment when possible + /// if `other` is a `nsA[C]String`. + pub fn assign<T: $StringLike + ?Sized>(&mut self, other: &T) { + unsafe { $assign(self, other.adapt().as_ptr()) }; + } + + /// Assign the value of `other` into self, overwriting any value + /// currently stored. Performs an optimized assignment when possible + /// if `other` is a `nsA[C]String`. + /// + /// Returns Ok(()) on success, and Err(()) if the allocation failed. + pub fn fallible_assign<T: $StringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> { + if unsafe { $fallible_assign(self, other.adapt().as_ptr()) } { + Ok(()) + } else { + Err(()) + } + } + + /// Take the value of `other` and set `self`, overwriting any value + /// currently stored. The passed-in string will be truncated. + pub fn take_from(&mut self, other: &mut $AString) { + unsafe { $take_from(self, other) }; + } + + /// Take the value of `other` and set `self`, overwriting any value + /// currently stored. If this function fails, the source string will + /// be left untouched, otherwise it will be truncated. + /// + /// Returns Ok(()) on success, and Err(()) if the allocation failed. + pub fn fallible_take_from(&mut self, other: &mut $AString) -> Result<(), ()> { + if unsafe { $fallible_take_from(self, other) } { + Ok(()) + } else { + Err(()) + } + } + + /// Append the value of `other` into self. + pub fn append<T: $StringLike + ?Sized>(&mut self, other: &T) { + unsafe { $append(self, other.adapt().as_ptr()) }; + } + + /// Append the value of `other` into self. + /// + /// Returns Ok(()) on success, and Err(()) if the allocation failed. + pub fn fallible_append<T: $StringLike + ?Sized>(&mut self, other: &T) -> Result<(), ()> { + if unsafe { $fallible_append(self, other.adapt().as_ptr()) } { + Ok(()) + } else { + Err(()) + } + } + + /// Mark the string's data as void. If `true`, the string will be truncated. + /// + /// A void string is generally converted to a `null` JS value by bindings code. + pub fn set_is_void(&mut self, is_void: bool) { + if is_void { + self.truncate(); + } + unsafe { + self.as_repr_mut().as_mut().dataflags.set(DataFlags::VOIDED, is_void); + } + } + + /// Returns whether the string's data is voided. + pub fn is_void(&self) -> bool { + self.as_repr().dataflags.contains(DataFlags::VOIDED) + } + + /// Set the length of the string to the passed-in length, and expand + /// the backing capacity to match. This method is unsafe as it can + /// expose uninitialized memory when len is greater than the current + /// length of the string. + pub unsafe fn set_length(&mut self, len: u32) { + $set_length(self, len); + } + + /// Set the length of the string to the passed-in length, and expand + /// the backing capacity to match. This method is unsafe as it can + /// expose uninitialized memory when len is greater than the current + /// length of the string. + /// + /// Returns Ok(()) on success, and Err(()) if the allocation failed. + pub unsafe fn fallible_set_length(&mut self, len: u32) -> Result<(), ()> { + if $fallible_set_length(self, len) { + Ok(()) + } else { + Err(()) + } + } + + pub fn truncate(&mut self) { + unsafe { + self.set_length(0); + } + } + + /// Get a `&mut` reference to the backing data for this string. + /// This method will allocate and copy if the current backing buffer + /// is immutable or shared. + pub fn to_mut(&mut self) -> &mut [$char_t] { + unsafe { + let len = self.len(); + if len == 0 { + // Use an arbitrary but aligned non-null value as the pointer + slice::from_raw_parts_mut(ptr::NonNull::<$char_t>::dangling().as_ptr(), 0) + } else { + slice::from_raw_parts_mut($begin_writing(self), len) + } + } + } + + /// Get a `&mut` reference to the backing data for this string. + /// This method will allocate and copy if the current backing buffer + /// is immutable or shared. + /// + /// Returns `Ok(&mut [T])` on success, and `Err(())` if the + /// allocation failed. + pub fn fallible_to_mut(&mut self) -> Result<&mut [$char_t], ()> { + unsafe { + let len = self.len(); + if len == 0 { + // Use an arbitrary but aligned non-null value as the pointer + Ok(slice::from_raw_parts_mut( + ptr::NonNull::<$char_t>::dangling().as_ptr() as *mut $char_t, 0)) + } else { + let ptr = $fallible_begin_writing(self); + if ptr.is_null() { + Err(()) + } else { + Ok(slice::from_raw_parts_mut(ptr, len)) + } + } + } + } + + /// Unshares the buffer of the string and returns a handle + /// from which a writable slice whose length is the rounded-up + /// capacity can be obtained. + /// + /// Fails also if the new length doesn't fit in 32 bits. + /// + /// # Safety + /// + /// Unsafe because of exposure of uninitialized memory. + pub unsafe fn bulk_write(&mut self, + capacity: usize, + units_to_preserve: usize, + allow_shrinking: bool) -> Result<$BulkWriteHandle, ()> { + let capacity = + self.start_bulk_write_impl(capacity, units_to_preserve, allow_shrinking)?; + Ok($BulkWriteHandle::new(self, capacity)) + } + + unsafe fn start_bulk_write_impl(&mut self, + capacity: usize, + units_to_preserve: usize, + allow_shrinking: bool) -> Result<usize, ()> { + if capacity > u32::MAX as usize { + Err(()) + } else { + let capacity32 = capacity as u32; + let rounded = $start_bulk_write(self, + capacity32, + units_to_preserve as u32, + allow_shrinking && capacity > SHRINKING_THRESHOLD); + if rounded == u32::MAX { + return Err(()) + } + Ok(rounded as usize) + } + } + + fn as_repr(&self) -> &$StringRepr { + // All $AString values point to a struct prefix which is + // identical to $StringRepr, thus we can cast `self` + // into *const $StringRepr to get the reference to the + // underlying data. + unsafe { + &*(self as *const _ as *const $StringRepr) + } + } + + fn as_repr_mut(&mut self) -> ptr::NonNull<$StringRepr> { + unsafe { ptr::NonNull::new_unchecked(self as *mut _ as *mut $StringRepr)} + } + + fn as_auto_string_repr(&self) -> Option<&$AutoStringRepr> { + if !self.as_repr().classflags.contains(ClassFlags::INLINE) { + return None; + } + + unsafe { + Some(&*(self as *const _ as *const $AutoStringRepr)) + } + } + + /// If this is an autostring, returns the capacity (excluding the + /// zero terminator) of the inline buffer within `Some()`. Otherwise + /// returns `None`. + pub fn inline_capacity(&self) -> Option<usize> { + Some(self.as_auto_string_repr()?.inline_capacity as usize) + } + } + + impl Deref for $AString { + type Target = [$char_t]; + fn deref(&self) -> &[$char_t] { + unsafe { + // All $AString values point to a struct prefix which is + // identical to $StringRepr, thus we can cast `self` + // into *const $StringRepr to get the reference to the + // underlying data. + let this = &*(self as *const _ as *const $StringRepr); + slice::from_raw_parts(this.data.as_ptr(), this.length as usize) + } + } + } + + impl AsRef<[$char_t]> for $AString { + fn as_ref(&self) -> &[$char_t] { + self + } + } + + impl cmp::PartialEq for $AString { + fn eq(&self, other: &$AString) -> bool { + &self[..] == &other[..] + } + } + + impl cmp::PartialEq<[$char_t]> for $AString { + fn eq(&self, other: &[$char_t]) -> bool { + &self[..] == other + } + } + + impl cmp::PartialEq<$String> for $AString { + fn eq(&self, other: &$String) -> bool { + self.eq(&**other) + } + } + + impl<'a> cmp::PartialEq<$Str<'a>> for $AString { + fn eq(&self, other: &$Str<'a>) -> bool { + self.eq(&**other) + } + } + + #[repr(C)] + pub struct $Str<'a> { + hdr: $StringRepr, + _marker: PhantomData<&'a [$char_t]>, + } + + impl $Str<'static> { + pub fn new() -> $Str<'static> { + $Str { + hdr: $StringRepr::new(ClassFlags::empty()), + _marker: PhantomData, + } + } + } + + impl<'a> Drop for $Str<'a> { + fn drop(&mut self) { + unsafe { + $drop(&mut **self); + } + } + } + + impl<'a> Deref for $Str<'a> { + type Target = $AString; + fn deref(&self) -> &$AString { + &self.hdr + } + } + + impl<'a> DerefMut for $Str<'a> { + fn deref_mut(&mut self) -> &mut $AString { + &mut self.hdr + } + } + + impl<'a> AsRef<[$char_t]> for $Str<'a> { + fn as_ref(&self) -> &[$char_t] { + &self + } + } + + impl<'a> From<&'a [$char_t]> for $Str<'a> { + fn from(s: &'a [$char_t]) -> $Str<'a> { + assert!(s.len() < (u32::MAX as usize)); + if s.is_empty() { + return $Str::new(); + } + $Str { + hdr: $StringRepr { + data: unsafe { ptr::NonNull::new_unchecked(s.as_ptr() as *mut _) }, + length: s.len() as u32, + dataflags: DataFlags::empty(), + classflags: ClassFlags::empty(), + }, + _marker: PhantomData, + } + } + } + + impl<'a> From<&'a Vec<$char_t>> for $Str<'a> { + fn from(s: &'a Vec<$char_t>) -> $Str<'a> { + $Str::from(&s[..]) + } + } + + impl<'a> From<&'a $AString> for $Str<'a> { + fn from(s: &'a $AString) -> $Str<'a> { + $Str::from(&s[..]) + } + } + + impl<'a> fmt::Write for $Str<'a> { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + $AString::write_str(self, s) + } + } + + impl<'a> fmt::Display for $Str<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <$AString as fmt::Display>::fmt(self, f) + } + } + + impl<'a> fmt::Debug for $Str<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <$AString as fmt::Debug>::fmt(self, f) + } + } + + impl<'a> cmp::PartialEq for $Str<'a> { + fn eq(&self, other: &$Str<'a>) -> bool { + $AString::eq(self, other) + } + } + + impl<'a> cmp::PartialEq<[$char_t]> for $Str<'a> { + fn eq(&self, other: &[$char_t]) -> bool { + $AString::eq(self, other) + } + } + + impl<'a, 'b> cmp::PartialEq<&'b [$char_t]> for $Str<'a> { + fn eq(&self, other: &&'b [$char_t]) -> bool { + $AString::eq(self, *other) + } + } + + impl<'a> cmp::PartialEq<str> for $Str<'a> { + fn eq(&self, other: &str) -> bool { + $AString::eq(self, other) + } + } + + impl<'a, 'b> cmp::PartialEq<&'b str> for $Str<'a> { + fn eq(&self, other: &&'b str) -> bool { + $AString::eq(self, *other) + } + } + + #[repr(C)] + pub struct $String { + hdr: $StringRepr, + } + + unsafe impl Send for $String {} + unsafe impl Sync for $String {} + + impl $String { + pub fn new() -> $String { + $String { + hdr: $StringRepr::new(ClassFlags::NULL_TERMINATED), + } + } + + /// Converts this String into a StringRepr, which will leak if the + /// repr is not passed to something that knows how to free it. + pub fn into_repr(mut self) -> $StringRepr { + mem::replace(&mut self.hdr, $StringRepr::new(ClassFlags::NULL_TERMINATED)) + } + } + + impl Drop for $String { + fn drop(&mut self) { + unsafe { + $drop(&mut **self); + } + } + } + + impl Deref for $String { + type Target = $AString; + fn deref(&self) -> &$AString { + &self.hdr + } + } + + impl DerefMut for $String { + fn deref_mut(&mut self) -> &mut $AString { + &mut self.hdr + } + } + + impl Clone for $String { + fn clone(&self) -> Self { + let mut copy = $String::new(); + copy.assign(self); + copy + } + } + + impl AsRef<[$char_t]> for $String { + fn as_ref(&self) -> &[$char_t] { + &self + } + } + + impl<'a> From<&'a [$char_t]> for $String { + fn from(s: &'a [$char_t]) -> $String { + let mut res = $String::new(); + res.assign(&$Str::from(&s[..])); + res + } + } + + impl<'a> From<&'a Vec<$char_t>> for $String { + fn from(s: &'a Vec<$char_t>) -> $String { + $String::from(&s[..]) + } + } + + impl<'a> From<&'a $AString> for $String { + fn from(s: &'a $AString) -> $String { + $String::from(&s[..]) + } + } + + impl From<Box<[$char_t]>> for $String { + fn from(s: Box<[$char_t]>) -> $String { + s.into_vec().into() + } + } + + impl From<Vec<$char_t>> for $String { + fn from(mut s: Vec<$char_t>) -> $String { + assert!(s.len() < (u32::MAX as usize)); + if s.is_empty() { + return $String::new(); + } + + let length = s.len() as u32; + s.push(0); // null terminator + + // SAFETY NOTE: This method produces an data_flags::OWNED + // ns[C]String from a Box<[$char_t]>. this is only safe + // because in the Gecko tree, we use the same allocator for + // Rust code as for C++ code, meaning that our box can be + // legally freed with libc::free(). + let ptr = s.as_mut_ptr(); + mem::forget(s); + unsafe { + Gecko_IncrementStringAdoptCount(ptr as *mut _); + } + $String { + hdr: $StringRepr { + data: unsafe { ptr::NonNull::new_unchecked(ptr) }, + length, + dataflags: DataFlags::OWNED | DataFlags::TERMINATED, + classflags: ClassFlags::NULL_TERMINATED, + } + } + } + } + + impl fmt::Write for $String { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + $AString::write_str(self, s) + } + } + + impl fmt::Display for $String { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <$AString as fmt::Display>::fmt(self, f) + } + } + + impl fmt::Debug for $String { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <$AString as fmt::Debug>::fmt(self, f) + } + } + + impl cmp::PartialEq for $String { + fn eq(&self, other: &$String) -> bool { + $AString::eq(self, other) + } + } + + impl cmp::PartialEq<[$char_t]> for $String { + fn eq(&self, other: &[$char_t]) -> bool { + $AString::eq(self, other) + } + } + + impl<'a> cmp::PartialEq<&'a [$char_t]> for $String { + fn eq(&self, other: &&'a [$char_t]) -> bool { + $AString::eq(self, *other) + } + } + + impl cmp::PartialEq<str> for $String { + fn eq(&self, other: &str) -> bool { + $AString::eq(self, other) + } + } + + impl<'a> cmp::PartialEq<&'a str> for $String { + fn eq(&self, other: &&'a str) -> bool { + $AString::eq(self, *other) + } + } + + /// An adapter type to allow for passing both types which coerce to + /// &[$char_type], and &$AString to a function, while still performing + /// optimized operations when passed the $AString. + pub enum $StringAdapter<'a> { + Borrowed($Str<'a>), + Abstract(&'a $AString), + } + + impl<'a> $StringAdapter<'a> { + fn as_ptr(&self) -> *const $AString { + &**self + } + } + + impl<'a> Deref for $StringAdapter<'a> { + type Target = $AString; + + fn deref(&self) -> &$AString { + match *self { + $StringAdapter::Borrowed(ref s) => s, + $StringAdapter::Abstract(ref s) => s, + } + } + } + + impl<'a> $StringAdapter<'a> { + #[allow(dead_code)] + fn is_abstract(&self) -> bool { + match *self { + $StringAdapter::Borrowed(_) => false, + $StringAdapter::Abstract(_) => true, + } + } + } + + string_like! { + char_t = $char_t; + + AString = $AString; + String = $String; + Str = $Str; + + StringLike = $StringLike; + StringAdapter = $StringAdapter; + } + } +} + +/////////////////////////////////////////// +// Bindings for nsCString (u8 char type) // +/////////////////////////////////////////// + +define_string_types! { + char_t = u8; + + AString = nsACString; + String = nsCString; + Str = nsCStr; + + StringLike = nsCStringLike; + StringAdapter = nsCStringAdapter; + + StringRepr = nsCStringRepr; + AutoStringRepr = nsAutoCStringRepr; + + BulkWriteHandle = nsACStringBulkWriteHandle; + + drop = Gecko_FinalizeCString; + assign = Gecko_AssignCString, Gecko_FallibleAssignCString; + take_from = Gecko_TakeFromCString, Gecko_FallibleTakeFromCString; + append = Gecko_AppendCString, Gecko_FallibleAppendCString; + set_length = Gecko_SetLengthCString, Gecko_FallibleSetLengthCString; + begin_writing = Gecko_BeginWritingCString, Gecko_FallibleBeginWritingCString; + start_bulk_write = Gecko_StartBulkWriteCString; +} + +impl nsACString { + /// Gets a CString as an utf-8 str or a String, trying to avoid copies, and + /// replacing invalid unicode sequences with replacement characters. + #[inline] + pub fn to_utf8(&self) -> borrow::Cow<str> { + String::from_utf8_lossy(&self[..]) + } + + #[inline] + pub unsafe fn as_str_unchecked(&self) -> &str { + if cfg!(debug_assertions) { + str::from_utf8(self).expect("Should be utf-8") + } else { + str::from_utf8_unchecked(self) + } + } +} + +impl<'a> From<&'a str> for nsCStr<'a> { + fn from(s: &'a str) -> nsCStr<'a> { + s.as_bytes().into() + } +} + +impl<'a> From<&'a String> for nsCStr<'a> { + fn from(s: &'a String) -> nsCStr<'a> { + nsCStr::from(&s[..]) + } +} + +impl<'a> From<&'a str> for nsCString { + fn from(s: &'a str) -> nsCString { + s.as_bytes().into() + } +} + +impl<'a> From<&'a String> for nsCString { + fn from(s: &'a String) -> nsCString { + nsCString::from(&s[..]) + } +} + +impl From<Box<str>> for nsCString { + fn from(s: Box<str>) -> nsCString { + s.into_string().into() + } +} + +impl From<String> for nsCString { + fn from(s: String) -> nsCString { + s.into_bytes().into() + } +} + +// Support for the write!() macro for appending to nsACStrings +impl fmt::Write for nsACString { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + self.append(s); + Ok(()) + } +} + +impl fmt::Display for nsACString { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt::Display::fmt(&self.to_utf8(), f) + } +} + +impl fmt::Debug for nsACString { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt::Debug::fmt(&self.to_utf8(), f) + } +} + +impl cmp::PartialEq<str> for nsACString { + fn eq(&self, other: &str) -> bool { + &self[..] == other.as_bytes() + } +} + +impl nsCStringLike for str { + fn adapt(&self) -> nsCStringAdapter { + nsCStringAdapter::Borrowed(nsCStr::from(self)) + } +} + +impl nsCStringLike for String { + fn adapt(&self) -> nsCStringAdapter { + nsCStringAdapter::Borrowed(nsCStr::from(&self[..])) + } +} + +impl nsCStringLike for Box<str> { + fn adapt(&self) -> nsCStringAdapter { + nsCStringAdapter::Borrowed(nsCStr::from(&self[..])) + } +} + +// This trait is implemented on types which are Latin1 `nsCString`-like, +// in that they can at very low cost be converted to a borrowed +// `&nsACString` and do not denote UTF-8ness in the Rust type system. +// +// This trait is used to DWIM when calling the methods on +// `nsACString`. +string_like! { + char_t = u8; + + AString = nsACString; + String = nsCString; + Str = nsCStr; + + StringLike = Latin1StringLike; + StringAdapter = nsCStringAdapter; +} + +/////////////////////////////////////////// +// Bindings for nsString (u16 char type) // +/////////////////////////////////////////// + +define_string_types! { + char_t = u16; + + AString = nsAString; + String = nsString; + Str = nsStr; + + StringLike = nsStringLike; + StringAdapter = nsStringAdapter; + + StringRepr = nsStringRepr; + AutoStringRepr = nsAutoStringRepr; + + BulkWriteHandle = nsAStringBulkWriteHandle; + + drop = Gecko_FinalizeString; + assign = Gecko_AssignString, Gecko_FallibleAssignString; + take_from = Gecko_TakeFromString, Gecko_FallibleTakeFromString; + append = Gecko_AppendString, Gecko_FallibleAppendString; + set_length = Gecko_SetLengthString, Gecko_FallibleSetLengthString; + begin_writing = Gecko_BeginWritingString, Gecko_FallibleBeginWritingString; + start_bulk_write = Gecko_StartBulkWriteString; +} + +// NOTE: The From impl for a string slice for nsString produces a <'static> +// lifetime, as it allocates. +impl<'a> From<&'a str> for nsString { + fn from(s: &'a str) -> nsString { + s.encode_utf16().collect::<Vec<u16>>().into() + } +} + +impl<'a> From<&'a String> for nsString { + fn from(s: &'a String) -> nsString { + nsString::from(&s[..]) + } +} + +// Support for the write!() macro for writing to nsStrings +impl fmt::Write for nsAString { + fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> { + // Directly invoke gecko's routines for appending utf8 strings to + // nsAString values, to avoid as much overhead as possible + self.append_str(s); + Ok(()) + } +} + +impl nsAString { + /// Turns this utf-16 string into a string, replacing invalid unicode + /// sequences with replacement characters. + /// + /// This is needed because the default ToString implementation goes through + /// fmt::Display, and thus allocates the string twice. + #[allow(clippy::inherent_to_string_shadow_display)] + pub fn to_string(&self) -> String { + String::from_utf16_lossy(&self[..]) + } +} + +impl fmt::Display for nsAString { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt::Display::fmt(&self.to_string(), f) + } +} + +impl fmt::Debug for nsAString { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt::Debug::fmt(&self.to_string(), f) + } +} + +impl cmp::PartialEq<str> for nsAString { + fn eq(&self, other: &str) -> bool { + other.encode_utf16().eq(self.iter().cloned()) + } +} + +#[cfg(not(feature = "gecko_debug"))] +#[allow(non_snake_case)] +unsafe fn Gecko_IncrementStringAdoptCount(_: *mut c_void) {} + +extern "C" { + #[cfg(feature = "gecko_debug")] + fn Gecko_IncrementStringAdoptCount(data: *mut c_void); + + // Gecko implementation in nsSubstring.cpp + fn Gecko_FinalizeCString(this: *mut nsACString); + + fn Gecko_AssignCString(this: *mut nsACString, other: *const nsACString); + fn Gecko_TakeFromCString(this: *mut nsACString, other: *mut nsACString); + fn Gecko_AppendCString(this: *mut nsACString, other: *const nsACString); + fn Gecko_SetLengthCString(this: *mut nsACString, length: u32); + fn Gecko_BeginWritingCString(this: *mut nsACString) -> *mut u8; + fn Gecko_FallibleAssignCString(this: *mut nsACString, other: *const nsACString) -> bool; + fn Gecko_FallibleTakeFromCString(this: *mut nsACString, other: *mut nsACString) -> bool; + fn Gecko_FallibleAppendCString(this: *mut nsACString, other: *const nsACString) -> bool; + fn Gecko_FallibleSetLengthCString(this: *mut nsACString, length: u32) -> bool; + fn Gecko_FallibleBeginWritingCString(this: *mut nsACString) -> *mut u8; + fn Gecko_StartBulkWriteCString( + this: *mut nsACString, + capacity: u32, + units_to_preserve: u32, + allow_shrinking: bool, + ) -> u32; + + fn Gecko_FinalizeString(this: *mut nsAString); + + fn Gecko_AssignString(this: *mut nsAString, other: *const nsAString); + fn Gecko_TakeFromString(this: *mut nsAString, other: *mut nsAString); + fn Gecko_AppendString(this: *mut nsAString, other: *const nsAString); + fn Gecko_SetLengthString(this: *mut nsAString, length: u32); + fn Gecko_BeginWritingString(this: *mut nsAString) -> *mut u16; + fn Gecko_FallibleAssignString(this: *mut nsAString, other: *const nsAString) -> bool; + fn Gecko_FallibleTakeFromString(this: *mut nsAString, other: *mut nsAString) -> bool; + fn Gecko_FallibleAppendString(this: *mut nsAString, other: *const nsAString) -> bool; + fn Gecko_FallibleSetLengthString(this: *mut nsAString, length: u32) -> bool; + fn Gecko_FallibleBeginWritingString(this: *mut nsAString) -> *mut u16; + fn Gecko_StartBulkWriteString( + this: *mut nsAString, + capacity: u32, + units_to_preserve: u32, + allow_shrinking: bool, + ) -> u32; +} + +////////////////////////////////////// +// Repr Validation Helper Functions // +////////////////////////////////////// + +pub mod test_helpers { + //! This module only exists to help with ensuring that the layout of the + //! structs inside of rust and C++ are identical. + //! + //! It is public to ensure that these testing functions are avaliable to + //! gtest code. + + use super::{nsACString, nsAString}; + use super::{nsCStr, nsCString, nsCStringRepr}; + use super::{nsStr, nsString, nsStringRepr}; + use super::{ClassFlags, DataFlags}; + use std::mem; + + /// Generates an #[no_mangle] extern "C" function which returns the size and + /// alignment of the given type with the given name. + macro_rules! size_align_check { + ($T:ty, $fname:ident) => { + #[no_mangle] + #[allow(non_snake_case)] + pub unsafe extern "C" fn $fname(size: *mut usize, align: *mut usize) { + *size = mem::size_of::<$T>(); + *align = mem::align_of::<$T>(); + } + }; + ($T:ty, $U:ty, $V:ty, $fname:ident) => { + #[no_mangle] + #[allow(non_snake_case)] + pub unsafe extern "C" fn $fname(size: *mut usize, align: *mut usize) { + *size = mem::size_of::<$T>(); + *align = mem::align_of::<$T>(); + + assert_eq!(*size, mem::size_of::<$U>()); + assert_eq!(*align, mem::align_of::<$U>()); + assert_eq!(*size, mem::size_of::<$V>()); + assert_eq!(*align, mem::align_of::<$V>()); + } + }; + } + + size_align_check!( + nsStringRepr, + nsString, + nsStr<'static>, + Rust_Test_ReprSizeAlign_nsString + ); + size_align_check!( + nsCStringRepr, + nsCString, + nsCStr<'static>, + Rust_Test_ReprSizeAlign_nsCString + ); + + /// Generates a $[no_mangle] extern "C" function which returns the size, + /// alignment and offset in the parent struct of a given member, with the + /// given name. + /// + /// This method can trigger Undefined Behavior if the accessing the member + /// $member on a given type would use that type's `Deref` implementation. + macro_rules! member_check { + ($T:ty, $U:ty, $V:ty, $member:ident, $method:ident) => { + #[no_mangle] + #[allow(non_snake_case)] + pub unsafe extern "C" fn $method( + size: *mut usize, + align: *mut usize, + offset: *mut usize, + ) { + // Create a temporary value of type T to get offsets, sizes + // and alignments from. + let tmp: mem::MaybeUninit<$T> = mem::MaybeUninit::uninit(); + // FIXME: This should use &raw references when available, + // this is technically UB as it creates a reference to + // uninitialized memory, but there's no better way to do + // this right now. + let tmp = &*tmp.as_ptr(); + *size = mem::size_of_val(&tmp.$member); + *align = mem::align_of_val(&tmp.$member); + *offset = (&tmp.$member as *const _ as usize) - (tmp as *const $T as usize); + + let tmp: mem::MaybeUninit<$U> = mem::MaybeUninit::uninit(); + let tmp = &*tmp.as_ptr(); + assert_eq!(*size, mem::size_of_val(&tmp.hdr.$member)); + assert_eq!(*align, mem::align_of_val(&tmp.hdr.$member)); + assert_eq!( + *offset, + (&tmp.hdr.$member as *const _ as usize) - (tmp as *const $U as usize) + ); + + let tmp: mem::MaybeUninit<$V> = mem::MaybeUninit::uninit(); + let tmp = &*tmp.as_ptr(); + assert_eq!(*size, mem::size_of_val(&tmp.hdr.$member)); + assert_eq!(*align, mem::align_of_val(&tmp.hdr.$member)); + assert_eq!( + *offset, + (&tmp.hdr.$member as *const _ as usize) - (tmp as *const $V as usize) + ); + } + }; + } + + member_check!( + nsStringRepr, + nsString, + nsStr<'static>, + data, + Rust_Test_Member_nsString_mData + ); + member_check!( + nsStringRepr, + nsString, + nsStr<'static>, + length, + Rust_Test_Member_nsString_mLength + ); + member_check!( + nsStringRepr, + nsString, + nsStr<'static>, + dataflags, + Rust_Test_Member_nsString_mDataFlags + ); + member_check!( + nsStringRepr, + nsString, + nsStr<'static>, + classflags, + Rust_Test_Member_nsString_mClassFlags + ); + member_check!( + nsCStringRepr, + nsCString, + nsCStr<'static>, + data, + Rust_Test_Member_nsCString_mData + ); + member_check!( + nsCStringRepr, + nsCString, + nsCStr<'static>, + length, + Rust_Test_Member_nsCString_mLength + ); + member_check!( + nsCStringRepr, + nsCString, + nsCStr<'static>, + dataflags, + Rust_Test_Member_nsCString_mDataFlags + ); + member_check!( + nsCStringRepr, + nsCString, + nsCStr<'static>, + classflags, + Rust_Test_Member_nsCString_mClassFlags + ); + + #[no_mangle] + #[allow(non_snake_case)] + pub unsafe extern "C" fn Rust_Test_NsStringFlags( + f_terminated: *mut u16, + f_voided: *mut u16, + f_refcounted: *mut u16, + f_owned: *mut u16, + f_inline: *mut u16, + f_literal: *mut u16, + f_class_inline: *mut u16, + f_class_null_terminated: *mut u16, + ) { + *f_terminated = DataFlags::TERMINATED.bits(); + *f_voided = DataFlags::VOIDED.bits(); + *f_refcounted = DataFlags::REFCOUNTED.bits(); + *f_owned = DataFlags::OWNED.bits(); + *f_inline = DataFlags::INLINE.bits(); + *f_literal = DataFlags::LITERAL.bits(); + *f_class_inline = ClassFlags::INLINE.bits(); + *f_class_null_terminated = ClassFlags::NULL_TERMINATED.bits(); + } + + #[no_mangle] + #[allow(non_snake_case)] + pub unsafe extern "C" fn Rust_InlineCapacityFromRust( + cstring: *const nsACString, + string: *const nsAString, + cstring_capacity: *mut usize, + string_capacity: *mut usize, + ) { + *cstring_capacity = (*cstring).inline_capacity().unwrap(); + *string_capacity = (*string).inline_capacity().unwrap(); + } +} |