diff options
Diffstat (limited to 'servo/components/style/selector_map.rs')
-rw-r--r-- | servo/components/style/selector_map.rs | 675 |
1 files changed, 675 insertions, 0 deletions
diff --git a/servo/components/style/selector_map.rs b/servo/components/style/selector_map.rs new file mode 100644 index 0000000000..4ea8cc1a01 --- /dev/null +++ b/servo/components/style/selector_map.rs @@ -0,0 +1,675 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! A data structure to efficiently index structs containing selectors by local +//! name, ids and hash. + +use crate::applicable_declarations::ApplicableDeclarationList; +use crate::context::QuirksMode; +use crate::dom::TElement; +use crate::hash::map as hash_map; +use crate::hash::{HashMap, HashSet}; +use crate::rule_tree::CascadeLevel; +use crate::selector_parser::SelectorImpl; +use crate::stylist::Rule; +use crate::{Atom, LocalName, Namespace, WeakAtom}; +use fallible::FallibleVec; +use hashglobe::FailedAllocationError; +use precomputed_hash::PrecomputedHash; +use selectors::matching::{matches_selector, ElementSelectorFlags, MatchingContext}; +use selectors::parser::{Combinator, Component, SelectorIter}; +use smallvec::SmallVec; +use std::hash::{BuildHasherDefault, Hash, Hasher}; + +/// A hasher implementation that doesn't hash anything, because it expects its +/// input to be a suitable u32 hash. +pub struct PrecomputedHasher { + hash: Option<u32>, +} + +impl Default for PrecomputedHasher { + fn default() -> Self { + Self { hash: None } + } +} + +/// A simple alias for a hashmap using PrecomputedHasher. +pub type PrecomputedHashMap<K, V> = HashMap<K, V, BuildHasherDefault<PrecomputedHasher>>; + +/// A simple alias for a hashset using PrecomputedHasher. +pub type PrecomputedHashSet<K> = HashSet<K, BuildHasherDefault<PrecomputedHasher>>; + +impl Hasher for PrecomputedHasher { + #[inline] + fn write(&mut self, _: &[u8]) { + unreachable!( + "Called into PrecomputedHasher with something that isn't \ + a u32" + ) + } + + #[inline] + fn write_u32(&mut self, i: u32) { + debug_assert!(self.hash.is_none()); + self.hash = Some(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash.expect("PrecomputedHasher wasn't fed?") as u64 + } +} + +/// A trait to abstract over a given selector map entry. +pub trait SelectorMapEntry: Sized + Clone { + /// Gets the selector we should use to index in the selector map. + fn selector(&self) -> SelectorIter<SelectorImpl>; +} + +/// Map element data to selector-providing objects for which the last simple +/// selector starts with them. +/// +/// e.g., +/// "p > img" would go into the set of selectors corresponding to the +/// element "img" +/// "a .foo .bar.baz" would go into the set of selectors corresponding to +/// the class "bar" +/// +/// Because we match selectors right-to-left (i.e., moving up the tree +/// from an element), we need to compare the last simple selector in the +/// selector with the element. +/// +/// So, if an element has ID "id1" and classes "foo" and "bar", then all +/// the rules it matches will have their last simple selector starting +/// either with "#id1" or with ".foo" or with ".bar". +/// +/// Hence, the union of the rules keyed on each of element's classes, ID, +/// element name, etc. will contain the Selectors that actually match that +/// element. +/// +/// We use a 1-entry SmallVec to avoid a separate heap allocation in the case +/// where we only have one entry, which is quite common. See measurements in: +/// * https://bugzilla.mozilla.org/show_bug.cgi?id=1363789#c5 +/// * https://bugzilla.mozilla.org/show_bug.cgi?id=681755 +/// +/// TODO: Tune the initial capacity of the HashMap +#[derive(Debug, MallocSizeOf)] +pub struct SelectorMap<T: 'static> { + /// Rules that have `:root` selectors. + pub root: SmallVec<[T; 1]>, + /// A hash from an ID to rules which contain that ID selector. + pub id_hash: MaybeCaseInsensitiveHashMap<Atom, SmallVec<[T; 1]>>, + /// A hash from a class name to rules which contain that class selector. + pub class_hash: MaybeCaseInsensitiveHashMap<Atom, SmallVec<[T; 1]>>, + /// A hash from local name to rules which contain that local name selector. + pub local_name_hash: PrecomputedHashMap<LocalName, SmallVec<[T; 1]>>, + /// A hash from namespace to rules which contain that namespace selector. + pub namespace_hash: PrecomputedHashMap<Namespace, SmallVec<[T; 1]>>, + /// All other rules. + pub other: SmallVec<[T; 1]>, + /// The number of entries in this map. + pub count: usize, +} + +impl<T: 'static> Default for SelectorMap<T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +// FIXME(Manishearth) the 'static bound can be removed when +// our HashMap fork (hashglobe) is able to use NonZero, +// or when stdlib gets fallible collections +impl<T: 'static> SelectorMap<T> { + /// Trivially constructs an empty `SelectorMap`. + pub fn new() -> Self { + SelectorMap { + root: SmallVec::new(), + id_hash: MaybeCaseInsensitiveHashMap::new(), + class_hash: MaybeCaseInsensitiveHashMap::new(), + local_name_hash: HashMap::default(), + namespace_hash: HashMap::default(), + other: SmallVec::new(), + count: 0, + } + } + + /// Clears the hashmap retaining storage. + pub fn clear(&mut self) { + self.root.clear(); + self.id_hash.clear(); + self.class_hash.clear(); + self.local_name_hash.clear(); + self.namespace_hash.clear(); + self.other.clear(); + self.count = 0; + } + + /// Returns whether there are any entries in the map. + pub fn is_empty(&self) -> bool { + self.count == 0 + } + + /// Returns the number of entries. + pub fn len(&self) -> usize { + self.count + } +} + +impl SelectorMap<Rule> { + /// Append to `rule_list` all Rules in `self` that match element. + /// + /// Extract matching rules as per element's ID, classes, tag name, etc.. + /// Sort the Rules at the end to maintain cascading order. + pub fn get_all_matching_rules<E, F>( + &self, + element: E, + rule_hash_target: E, + matching_rules_list: &mut ApplicableDeclarationList, + context: &mut MatchingContext<E::Impl>, + flags_setter: &mut F, + cascade_level: CascadeLevel, + ) where + E: TElement, + F: FnMut(&E, ElementSelectorFlags), + { + if self.is_empty() { + return; + } + + let quirks_mode = context.quirks_mode(); + + if rule_hash_target.is_root() { + SelectorMap::get_matching_rules( + element, + &self.root, + matching_rules_list, + context, + flags_setter, + cascade_level, + ); + } + + if let Some(id) = rule_hash_target.id() { + if let Some(rules) = self.id_hash.get(id, quirks_mode) { + SelectorMap::get_matching_rules( + element, + rules, + matching_rules_list, + context, + flags_setter, + cascade_level, + ) + } + } + + rule_hash_target.each_class(|class| { + if let Some(rules) = self.class_hash.get(&class, quirks_mode) { + SelectorMap::get_matching_rules( + element, + rules, + matching_rules_list, + context, + flags_setter, + cascade_level, + ) + } + }); + + if let Some(rules) = self.local_name_hash.get(rule_hash_target.local_name()) { + SelectorMap::get_matching_rules( + element, + rules, + matching_rules_list, + context, + flags_setter, + cascade_level, + ) + } + + if let Some(rules) = self.namespace_hash.get(rule_hash_target.namespace()) { + SelectorMap::get_matching_rules( + element, + rules, + matching_rules_list, + context, + flags_setter, + cascade_level, + ) + } + + SelectorMap::get_matching_rules( + element, + &self.other, + matching_rules_list, + context, + flags_setter, + cascade_level, + ); + } + + /// Adds rules in `rules` that match `element` to the `matching_rules` list. + pub(crate) fn get_matching_rules<E, F>( + element: E, + rules: &[Rule], + matching_rules: &mut ApplicableDeclarationList, + context: &mut MatchingContext<E::Impl>, + flags_setter: &mut F, + cascade_level: CascadeLevel, + ) where + E: TElement, + F: FnMut(&E, ElementSelectorFlags), + { + for rule in rules { + if matches_selector( + &rule.selector, + 0, + Some(&rule.hashes), + &element, + context, + flags_setter, + ) { + matching_rules.push(rule.to_applicable_declaration_block(cascade_level)); + } + } + } +} + +impl<T: SelectorMapEntry> SelectorMap<T> { + /// Inserts an entry into the correct bucket(s). + pub fn insert( + &mut self, + entry: T, + quirks_mode: QuirksMode, + ) -> Result<(), FailedAllocationError> { + self.count += 1; + + // NOTE(emilio): It'd be nice for this to be a separate function, but + // then the compiler can't reason about the lifetime dependency between + // `entry` and `bucket`, and would force us to clone the rule in the + // common path. + macro_rules! insert_into_bucket { + ($entry:ident, $bucket:expr) => {{ + match $bucket { + Bucket::Root => &mut self.root, + Bucket::ID(id) => self + .id_hash + .try_entry(id.clone(), quirks_mode)? + .or_insert_with(SmallVec::new), + Bucket::Class(class) => self + .class_hash + .try_entry(class.clone(), quirks_mode)? + .or_insert_with(SmallVec::new), + Bucket::LocalName { name, lower_name } => { + // If the local name in the selector isn't lowercase, + // insert it into the rule hash twice. This means that, + // during lookup, we can always find the rules based on + // the local name of the element, regardless of whether + // it's an html element in an html document (in which + // case we match against lower_name) or not (in which + // case we match against name). + // + // In the case of a non-html-element-in-html-document + // with a lowercase localname and a non-lowercase + // selector, the rulehash lookup may produce superfluous + // selectors, but the subsequent selector matching work + // will filter them out. + if name != lower_name { + self.local_name_hash + .try_entry(lower_name.clone())? + .or_insert_with(SmallVec::new) + .try_push($entry.clone())?; + } + self.local_name_hash + .try_entry(name.clone())? + .or_insert_with(SmallVec::new) + }, + Bucket::Namespace(url) => self + .namespace_hash + .try_entry(url.clone())? + .or_insert_with(SmallVec::new), + Bucket::Universal => &mut self.other, + } + .try_push($entry)?; + }}; + } + + let bucket = { + let mut disjoint_buckets = SmallVec::new(); + let bucket = find_bucket(entry.selector(), &mut disjoint_buckets); + + // See if inserting this selector in multiple entries in the + // selector map would be worth it. Consider a case like: + // + // .foo:where(div, #bar) + // + // There, `bucket` would be `Class(foo)`, and disjoint_buckets would + // be `[LocalName { div }, ID(bar)]`. + // + // Here we choose to insert the selector in the `.foo` bucket in + // such a case, as it's likely more worth it than inserting it in + // both `div` and `#bar`. + // + // This is specially true if there's any universal selector in the + // `disjoint_selectors` set, at which point we'd just be doing + // wasted work. + if !disjoint_buckets.is_empty() && + disjoint_buckets + .iter() + .all(|b| b.more_specific_than(&bucket)) + { + for bucket in &disjoint_buckets { + let entry = entry.clone(); + insert_into_bucket!(entry, *bucket); + } + return Ok(()); + } + bucket + }; + + insert_into_bucket!(entry, bucket); + Ok(()) + } + + /// Looks up entries by id, class, local name, namespace, and other (in + /// order). + /// + /// Each entry is passed to the callback, which returns true to continue + /// iterating entries, or false to terminate the lookup. + /// + /// Returns false if the callback ever returns false. + /// + /// FIXME(bholley) This overlaps with SelectorMap<Rule>::get_all_matching_rules, + /// but that function is extremely hot and I'd rather not rearrange it. + #[inline] + pub fn lookup<'a, E, F>(&'a self, element: E, quirks_mode: QuirksMode, mut f: F) -> bool + where + E: TElement, + F: FnMut(&'a T) -> bool, + { + if element.is_root() { + for entry in self.root.iter() { + if !f(&entry) { + return false; + } + } + } + + if let Some(id) = element.id() { + if let Some(v) = self.id_hash.get(id, quirks_mode) { + for entry in v.iter() { + if !f(&entry) { + return false; + } + } + } + } + + let mut done = false; + element.each_class(|class| { + if !done { + if let Some(v) = self.class_hash.get(class, quirks_mode) { + for entry in v.iter() { + if !f(&entry) { + done = true; + return; + } + } + } + } + }); + if done { + return false; + } + + if let Some(v) = self.local_name_hash.get(element.local_name()) { + for entry in v.iter() { + if !f(&entry) { + return false; + } + } + } + + if let Some(v) = self.namespace_hash.get(element.namespace()) { + for entry in v.iter() { + if !f(&entry) { + return false; + } + } + } + + for entry in self.other.iter() { + if !f(&entry) { + return false; + } + } + + true + } + + /// Performs a normal lookup, and also looks up entries for the passed-in + /// id and classes. + /// + /// Each entry is passed to the callback, which returns true to continue + /// iterating entries, or false to terminate the lookup. + /// + /// Returns false if the callback ever returns false. + #[inline] + pub fn lookup_with_additional<'a, E, F>( + &'a self, + element: E, + quirks_mode: QuirksMode, + additional_id: Option<&WeakAtom>, + additional_classes: &[Atom], + mut f: F, + ) -> bool + where + E: TElement, + F: FnMut(&'a T) -> bool, + { + // Do the normal lookup. + if !self.lookup(element, quirks_mode, |entry| f(entry)) { + return false; + } + + // Check the additional id. + if let Some(id) = additional_id { + if let Some(v) = self.id_hash.get(id, quirks_mode) { + for entry in v.iter() { + if !f(&entry) { + return false; + } + } + } + } + + // Check the additional classes. + for class in additional_classes { + if let Some(v) = self.class_hash.get(class, quirks_mode) { + for entry in v.iter() { + if !f(&entry) { + return false; + } + } + } + } + + true + } +} + +enum Bucket<'a> { + Universal, + Namespace(&'a Namespace), + LocalName { + name: &'a LocalName, + lower_name: &'a LocalName, + }, + Class(&'a Atom), + ID(&'a Atom), + Root, +} + +impl<'a> Bucket<'a> { + /// root > id > class > local name > namespace > universal. + #[inline] + fn specificity(&self) -> usize { + match *self { + Bucket::Universal => 0, + Bucket::Namespace(..) => 1, + Bucket::LocalName { .. } => 2, + Bucket::Class(..) => 3, + Bucket::ID(..) => 4, + Bucket::Root => 5, + } + } + + #[inline] + fn more_specific_than(&self, other: &Self) -> bool { + self.specificity() > other.specificity() + } +} + +type DisjointBuckets<'a> = SmallVec<[Bucket<'a>; 5]>; + +fn specific_bucket_for<'a>( + component: &'a Component<SelectorImpl>, + disjoint_buckets: &mut DisjointBuckets<'a>, +) -> Bucket<'a> { + match *component { + Component::Root => Bucket::Root, + Component::ID(ref id) => Bucket::ID(id), + Component::Class(ref class) => Bucket::Class(class), + Component::LocalName(ref selector) => Bucket::LocalName { + name: &selector.name, + lower_name: &selector.lower_name, + }, + Component::Namespace(_, ref url) | Component::DefaultNamespace(ref url) => { + Bucket::Namespace(url) + }, + // ::slotted(..) isn't a normal pseudo-element, so we can insert it on + // the rule hash normally without much problem. For example, in a + // selector like: + // + // div::slotted(span)::before + // + // It looks like: + // + // [ + // LocalName(div), + // Combinator(SlotAssignment), + // Slotted(span), + // Combinator::PseudoElement, + // PseudoElement(::before), + // ] + // + // So inserting `span` in the rule hash makes sense since we want to + // match the slotted <span>. + Component::Slotted(ref selector) => find_bucket(selector.iter(), disjoint_buckets), + Component::Host(Some(ref selector)) => find_bucket(selector.iter(), disjoint_buckets), + Component::Is(ref list) | Component::Where(ref list) => { + if list.len() == 1 { + find_bucket(list[0].iter(), disjoint_buckets) + } else { + for selector in &**list { + let bucket = find_bucket(selector.iter(), disjoint_buckets); + disjoint_buckets.push(bucket); + } + Bucket::Universal + } + }, + _ => Bucket::Universal, + } +} + +/// Searches a compound selector from left to right, and returns the appropriate +/// bucket for it. +/// +/// It also populates disjoint_buckets with dependencies from nested selectors +/// with any semantics like :is() and :where(). +#[inline(always)] +fn find_bucket<'a>( + mut iter: SelectorIter<'a, SelectorImpl>, + disjoint_buckets: &mut DisjointBuckets<'a>, +) -> Bucket<'a> { + let mut current_bucket = Bucket::Universal; + + loop { + for ss in &mut iter { + let new_bucket = specific_bucket_for(ss, disjoint_buckets); + if new_bucket.more_specific_than(¤t_bucket) { + current_bucket = new_bucket; + } + } + + // Effectively, pseudo-elements are ignored, given only state + // pseudo-classes may appear before them. + if iter.next_sequence() != Some(Combinator::PseudoElement) { + break; + } + } + + current_bucket +} + +/// Wrapper for PrecomputedHashMap that does ASCII-case-insensitive lookup in quirks mode. +#[derive(Debug, MallocSizeOf)] +pub struct MaybeCaseInsensitiveHashMap<K: PrecomputedHash + Hash + Eq, V: 'static>( + PrecomputedHashMap<K, V>, +); + +impl<V: 'static> Default for MaybeCaseInsensitiveHashMap<Atom, V> { + #[inline] + fn default() -> Self { + MaybeCaseInsensitiveHashMap(PrecomputedHashMap::default()) + } +} + +// FIXME(Manishearth) the 'static bound can be removed when +// our HashMap fork (hashglobe) is able to use NonZero, +// or when stdlib gets fallible collections +impl<V: 'static> MaybeCaseInsensitiveHashMap<Atom, V> { + /// Empty map + pub fn new() -> Self { + Self::default() + } + + /// HashMap::try_entry + pub fn try_entry( + &mut self, + mut key: Atom, + quirks_mode: QuirksMode, + ) -> Result<hash_map::Entry<Atom, V>, FailedAllocationError> { + if quirks_mode == QuirksMode::Quirks { + key = key.to_ascii_lowercase() + } + self.0.try_entry(key) + } + + /// HashMap::is_empty + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// HashMap::iter + pub fn iter(&self) -> hash_map::Iter<Atom, V> { + self.0.iter() + } + + /// HashMap::clear + pub fn clear(&mut self) { + self.0.clear() + } + + /// HashMap::get + pub fn get(&self, key: &WeakAtom, quirks_mode: QuirksMode) -> Option<&V> { + if quirks_mode == QuirksMode::Quirks { + self.0.get(&key.to_ascii_lowercase()) + } else { + self.0.get(key) + } + } +} |