diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /servo/components/style/gecko_string_cache/mod.rs | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'servo/components/style/gecko_string_cache/mod.rs')
-rw-r--r-- | servo/components/style/gecko_string_cache/mod.rs | 532 |
1 files changed, 532 insertions, 0 deletions
diff --git a/servo/components/style/gecko_string_cache/mod.rs b/servo/components/style/gecko_string_cache/mod.rs new file mode 100644 index 0000000000..cb040390cf --- /dev/null +++ b/servo/components/style/gecko_string_cache/mod.rs @@ -0,0 +1,532 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#![allow(unsafe_code)] +// This is needed for the constants in atom_macro.rs, because we have some +// atoms whose names differ only by case, e.g. datetime and dateTime. +#![allow(non_upper_case_globals)] + +//! A drop-in replacement for string_cache, but backed by Gecko `nsAtom`s. + +use crate::gecko_bindings::bindings::Gecko_AddRefAtom; +use crate::gecko_bindings::bindings::Gecko_Atomize; +use crate::gecko_bindings::bindings::Gecko_Atomize16; +use crate::gecko_bindings::bindings::Gecko_ReleaseAtom; +use crate::gecko_bindings::structs::root::mozilla::detail::gGkAtoms; +use crate::gecko_bindings::structs::root::mozilla::detail::kGkAtomsArrayOffset; +use crate::gecko_bindings::structs::root::mozilla::detail::GkAtoms_Atoms_AtomsCount; +use crate::gecko_bindings::structs::{nsAtom, nsDynamicAtom, nsStaticAtom}; +use nsstring::{nsAString, nsStr}; +use precomputed_hash::PrecomputedHash; +use std::borrow::{Borrow, Cow}; +use std::char::{self, DecodeUtf16}; +use std::fmt::{self, Write}; +use std::hash::{Hash, Hasher}; +use std::iter::Cloned; +use std::mem::{self, ManuallyDrop}; +use std::num::NonZeroUsize; +use std::ops::Deref; +use std::{slice, str}; +use style_traits::SpecifiedValueInfo; +use to_shmem::{self, SharedMemoryBuilder, ToShmem}; + +#[macro_use] +#[allow(improper_ctypes, non_camel_case_types, missing_docs)] +pub mod atom_macro { + include!(concat!(env!("OUT_DIR"), "/gecko/atom_macro.rs")); +} + +#[macro_use] +pub mod namespace; + +pub use self::namespace::{Namespace, WeakNamespace}; + +/// A handle to a Gecko atom. This is a type that can represent either: +/// +/// * A strong reference to a dynamic atom (an `nsAtom` pointer), in which case +/// the `usize` just holds the pointer value. +/// +/// * A byte offset from `gGkAtoms` to the `nsStaticAtom` object (shifted to +/// the left one bit, and with the lower bit set to `1` to differentiate it +/// from the above), so `(offset << 1 | 1)`. +/// +#[derive(Eq, PartialEq)] +#[repr(C)] +pub struct Atom(NonZeroUsize); + +/// An atom *without* a strong reference. +/// +/// Only usable as `&'a WeakAtom`, +/// where `'a` is the lifetime of something that holds a strong reference to that atom. +pub struct WeakAtom(nsAtom); + +/// The number of static atoms we have. +const STATIC_ATOM_COUNT: usize = GkAtoms_Atoms_AtomsCount as usize; + +/// Returns the Gecko static atom array. +/// +/// We have this rather than use rust-bindgen to generate +/// mozilla::detail::gGkAtoms and then just reference gGkAtoms.mAtoms, so we +/// avoid a problem with lld-link.exe on Windows. +/// +/// https://bugzilla.mozilla.org/show_bug.cgi?id=1517685 +#[inline] +fn static_atoms() -> &'static [nsStaticAtom; STATIC_ATOM_COUNT] { + unsafe { + let addr = &gGkAtoms as *const _ as usize + kGkAtomsArrayOffset as usize; + &*(addr as *const _) + } +} + +/// Returns whether the specified address points to one of the nsStaticAtom +/// objects in the Gecko static atom array. +#[inline] +fn valid_static_atom_addr(addr: usize) -> bool { + unsafe { + let atoms = static_atoms(); + let start = atoms.as_ptr(); + let end = atoms.get_unchecked(STATIC_ATOM_COUNT) as *const _; + let in_range = addr >= start as usize && addr < end as usize; + let aligned = addr % mem::align_of::<nsStaticAtom>() == 0; + in_range && aligned + } +} + +impl Deref for Atom { + type Target = WeakAtom; + + #[inline] + fn deref(&self) -> &WeakAtom { + unsafe { + let addr = if self.is_static() { + (&gGkAtoms as *const _ as usize) + (self.0.get() >> 1) + } else { + self.0.get() + }; + debug_assert!(!self.is_static() || valid_static_atom_addr(addr)); + WeakAtom::new(addr as *const nsAtom) + } + } +} + +impl PrecomputedHash for Atom { + #[inline] + fn precomputed_hash(&self) -> u32 { + self.get_hash() + } +} + +impl Borrow<WeakAtom> for Atom { + #[inline] + fn borrow(&self) -> &WeakAtom { + self + } +} + +impl ToShmem for Atom { + fn to_shmem(&self, _builder: &mut SharedMemoryBuilder) -> to_shmem::Result<Self> { + if !self.is_static() { + return Err(format!( + "ToShmem failed for Atom: must be a static atom: {}", + self + )); + } + + Ok(ManuallyDrop::new(Atom(self.0))) + } +} + +impl Eq for WeakAtom {} +impl PartialEq for WeakAtom { + #[inline] + fn eq(&self, other: &Self) -> bool { + let weak: *const WeakAtom = self; + let other: *const WeakAtom = other; + weak == other + } +} + +impl PartialEq<Atom> for WeakAtom { + #[inline] + fn eq(&self, other: &Atom) -> bool { + self == &**other + } +} + +unsafe impl Send for Atom {} +unsafe impl Sync for Atom {} +unsafe impl Sync for WeakAtom {} + +impl WeakAtom { + /// Construct a `WeakAtom` from a raw `nsAtom`. + #[inline] + pub unsafe fn new<'a>(atom: *const nsAtom) -> &'a mut Self { + &mut *(atom as *mut WeakAtom) + } + + /// Clone this atom, bumping the refcount if the atom is not static. + #[inline] + pub fn clone(&self) -> Atom { + unsafe { Atom::from_raw(self.as_ptr()) } + } + + /// Get the atom hash. + #[inline] + pub fn get_hash(&self) -> u32 { + self.0.mHash + } + + /// Get the atom as a slice of utf-16 chars. + #[inline] + pub fn as_slice(&self) -> &[u16] { + let string = if self.is_static() { + let atom_ptr = self.as_ptr() as *const nsStaticAtom; + let string_offset = unsafe { (*atom_ptr).mStringOffset }; + let string_offset = -(string_offset as isize); + let u8_ptr = atom_ptr as *const u8; + // It is safe to use offset() here because both addresses are within + // the same struct, e.g. mozilla::detail::gGkAtoms. + unsafe { u8_ptr.offset(string_offset) as *const u16 } + } else { + let atom_ptr = self.as_ptr() as *const nsDynamicAtom; + // Dynamic atom chars are stored at the end of the object. + unsafe { atom_ptr.offset(1) as *const u16 } + }; + unsafe { slice::from_raw_parts(string, self.len() as usize) } + } + + // NOTE: don't expose this, since it's slow, and easy to be misused. + fn chars(&self) -> DecodeUtf16<Cloned<slice::Iter<u16>>> { + char::decode_utf16(self.as_slice().iter().cloned()) + } + + /// Execute `cb` with the string that this atom represents. + /// + /// Find alternatives to this function when possible, please, since it's + /// pretty slow. + pub fn with_str<F, Output>(&self, cb: F) -> Output + where + F: FnOnce(&str) -> Output, + { + let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit(); + let buffer = unsafe { &mut *buffer.as_mut_ptr() }; + + // The total string length in utf16 is going to be less than or equal + // the slice length (each utf16 character is going to take at least one + // and at most 2 items in the utf16 slice). + // + // Each of those characters will take at most four bytes in the utf8 + // one. Thus if the slice is less than 64 / 4 (16) we can guarantee that + // we'll decode it in place. + let owned_string; + let len = self.len(); + let utf8_slice = if len <= 16 { + let mut total_len = 0; + + for c in self.chars() { + let c = c.unwrap_or(char::REPLACEMENT_CHARACTER); + let utf8_len = c.encode_utf8(&mut buffer[total_len..]).len(); + total_len += utf8_len; + } + + let slice = unsafe { str::from_utf8_unchecked(&buffer[..total_len]) }; + debug_assert_eq!(slice, String::from_utf16_lossy(self.as_slice())); + slice + } else { + owned_string = String::from_utf16_lossy(self.as_slice()); + &*owned_string + }; + + cb(utf8_slice) + } + + /// Returns whether this atom is static. + #[inline] + pub fn is_static(&self) -> bool { + self.0.mIsStatic() != 0 + } + + /// Returns whether this atom is ascii lowercase. + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.0.mIsAsciiLowercase() != 0 + } + + /// Returns the length of the atom string. + #[inline] + pub fn len(&self) -> u32 { + self.0.mLength() + } + + /// Returns whether this atom is the empty string. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns the atom as a mutable pointer. + #[inline] + pub fn as_ptr(&self) -> *mut nsAtom { + let const_ptr: *const nsAtom = &self.0; + const_ptr as *mut nsAtom + } + + /// Convert this atom to ASCII lower-case + pub fn to_ascii_lowercase(&self) -> Atom { + if self.is_ascii_lowercase() { + return self.clone(); + } + + let slice = self.as_slice(); + let mut buffer = mem::MaybeUninit::<[u16; 64]>::uninit(); + let buffer = unsafe { &mut *buffer.as_mut_ptr() }; + let mut vec; + let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) { + buffer_prefix.copy_from_slice(slice); + buffer_prefix + } else { + vec = slice.to_vec(); + &mut vec + }; + for char16 in &mut *mutable_slice { + if *char16 <= 0x7F { + *char16 = (*char16 as u8).to_ascii_lowercase() as u16 + } + } + Atom::from(&*mutable_slice) + } + + /// Return whether two atoms are ASCII-case-insensitive matches + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + if self == other { + return true; + } + + // If we know both atoms are ascii-lowercase, then we can stick with + // pointer equality. + if self.is_ascii_lowercase() && other.is_ascii_lowercase() { + debug_assert!(!self.eq_ignore_ascii_case_slow(other)); + return false; + } + + self.eq_ignore_ascii_case_slow(other) + } + + fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool { + let a = self.as_slice(); + let b = other.as_slice(); + + if a.len() != b.len() { + return false; + } + + a.iter().zip(b).all(|(&a16, &b16)| { + if a16 <= 0x7F && b16 <= 0x7F { + (a16 as u8).eq_ignore_ascii_case(&(b16 as u8)) + } else { + a16 == b16 + } + }) + } +} + +impl fmt::Debug for WeakAtom { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + write!(w, "Gecko WeakAtom({:p}, {})", self, self) + } +} + +impl fmt::Display for WeakAtom { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + for c in self.chars() { + w.write_char(c.unwrap_or(char::REPLACEMENT_CHARACTER))? + } + Ok(()) + } +} + +#[inline] +unsafe fn make_handle(ptr: *const nsAtom) -> NonZeroUsize { + debug_assert!(!ptr.is_null()); + if !WeakAtom::new(ptr).is_static() { + NonZeroUsize::new_unchecked(ptr as usize) + } else { + make_static_handle(ptr as *mut nsStaticAtom) + } +} + +#[inline] +unsafe fn make_static_handle(ptr: *const nsStaticAtom) -> NonZeroUsize { + // FIXME(heycam): Use offset_from once it's stabilized. + // https://github.com/rust-lang/rust/issues/41079 + debug_assert!(valid_static_atom_addr(ptr as usize)); + let base = &gGkAtoms as *const _; + let offset = ptr as usize - base as usize; + NonZeroUsize::new_unchecked((offset << 1) | 1) +} + +impl Atom { + #[inline] + fn is_static(&self) -> bool { + self.0.get() & 1 == 1 + } + + /// Execute a callback with the atom represented by `ptr`. + pub unsafe fn with<F, R>(ptr: *const nsAtom, callback: F) -> R + where + F: FnOnce(&Atom) -> R, + { + let atom = Atom(make_handle(ptr as *mut nsAtom)); + let ret = callback(&atom); + mem::forget(atom); + ret + } + + /// Creates a static atom from its index in the static atom table, without + /// checking. + #[inline] + pub const unsafe fn from_index_unchecked(index: u16) -> Self { + // FIXME(emilio): No support for debug_assert! in const fn for now. Note + // that violating this invariant will debug-assert in the `Deref` impl + // though. + // + // debug_assert!((index as usize) < STATIC_ATOM_COUNT); + let offset = + (index as usize) * std::mem::size_of::<nsStaticAtom>() + kGkAtomsArrayOffset as usize; + Atom(NonZeroUsize::new_unchecked((offset << 1) | 1)) + } + + /// Creates an atom from an atom pointer. + #[inline(always)] + pub unsafe fn from_raw(ptr: *mut nsAtom) -> Self { + let atom = Atom(make_handle(ptr)); + if !atom.is_static() { + Gecko_AddRefAtom(ptr); + } + atom + } + + /// Creates an atom from an atom pointer that has already had AddRef + /// called on it. This may be a static or dynamic atom. + #[inline] + pub unsafe fn from_addrefed(ptr: *mut nsAtom) -> Self { + assert!(!ptr.is_null()); + Atom(make_handle(ptr)) + } + + /// Convert this atom into an addrefed nsAtom pointer. + #[inline] + pub fn into_addrefed(self) -> *mut nsAtom { + let ptr = self.as_ptr(); + mem::forget(self); + ptr + } +} + +impl Hash for Atom { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + state.write_u32(self.get_hash()); + } +} + +impl Hash for WeakAtom { + fn hash<H>(&self, state: &mut H) + where + H: Hasher, + { + state.write_u32(self.get_hash()); + } +} + +impl Clone for Atom { + #[inline(always)] + fn clone(&self) -> Atom { + unsafe { + let atom = Atom(self.0); + if !atom.is_static() { + Gecko_AddRefAtom(atom.as_ptr()); + } + atom + } + } +} + +impl Drop for Atom { + #[inline] + fn drop(&mut self) { + if !self.is_static() { + unsafe { + Gecko_ReleaseAtom(self.as_ptr()); + } + } + } +} + +impl Default for Atom { + #[inline] + fn default() -> Self { + atom!("") + } +} + +impl fmt::Debug for Atom { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + write!(w, "Atom(0x{:08x}, {})", self.0, self) + } +} + +impl fmt::Display for Atom { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + self.deref().fmt(w) + } +} + +impl<'a> From<&'a str> for Atom { + #[inline] + fn from(string: &str) -> Atom { + debug_assert!(string.len() <= u32::max_value() as usize); + unsafe { + Atom::from_addrefed(Gecko_Atomize( + string.as_ptr() as *const _, + string.len() as u32, + )) + } + } +} + +impl<'a> From<&'a [u16]> for Atom { + #[inline] + fn from(slice: &[u16]) -> Atom { + Atom::from(&*nsStr::from(slice)) + } +} + +impl<'a> From<&'a nsAString> for Atom { + #[inline] + fn from(string: &nsAString) -> Atom { + unsafe { Atom::from_addrefed(Gecko_Atomize16(string)) } + } +} + +impl<'a> From<Cow<'a, str>> for Atom { + #[inline] + fn from(string: Cow<'a, str>) -> Atom { + Atom::from(&*string) + } +} + +impl From<String> for Atom { + #[inline] + fn from(string: String) -> Atom { + Atom::from(&*string) + } +} + +malloc_size_of_is_0!(Atom); + +impl SpecifiedValueInfo for Atom {} |