diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /servo/components/selectors | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'servo/components/selectors')
-rw-r--r-- | servo/components/selectors/CHANGES.md | 1 | ||||
-rw-r--r-- | servo/components/selectors/Cargo.toml | 35 | ||||
-rw-r--r-- | servo/components/selectors/README.md | 25 | ||||
-rw-r--r-- | servo/components/selectors/attr.rs | 183 | ||||
-rw-r--r-- | servo/components/selectors/bloom.rs | 422 | ||||
-rw-r--r-- | servo/components/selectors/build.rs | 77 | ||||
-rw-r--r-- | servo/components/selectors/builder.rs | 391 | ||||
-rw-r--r-- | servo/components/selectors/context.rs | 418 | ||||
-rw-r--r-- | servo/components/selectors/lib.rs | 41 | ||||
-rw-r--r-- | servo/components/selectors/matching.rs | 1370 | ||||
-rw-r--r-- | servo/components/selectors/nth_index_cache.rs | 102 | ||||
-rw-r--r-- | servo/components/selectors/parser.rs | 4483 | ||||
-rw-r--r-- | servo/components/selectors/relative_selector/cache.rs | 81 | ||||
-rw-r--r-- | servo/components/selectors/relative_selector/filter.rs | 159 | ||||
-rw-r--r-- | servo/components/selectors/relative_selector/mod.rs | 6 | ||||
-rw-r--r-- | servo/components/selectors/sink.rs | 31 | ||||
-rw-r--r-- | servo/components/selectors/tree.rs | 168 | ||||
-rw-r--r-- | servo/components/selectors/visitor.rs | 136 |
18 files changed, 8129 insertions, 0 deletions
diff --git a/servo/components/selectors/CHANGES.md b/servo/components/selectors/CHANGES.md new file mode 100644 index 0000000000..b1e9511dca --- /dev/null +++ b/servo/components/selectors/CHANGES.md @@ -0,0 +1 @@ +- `parser.rs` no longer wraps values in quotes (`"..."`) but expects their `to_css` impl to already wrap it ([Gecko Bug 1854809](https://bugzilla.mozilla.org/show_bug.cgi?id=1854809)) diff --git a/servo/components/selectors/Cargo.toml b/servo/components/selectors/Cargo.toml new file mode 100644 index 0000000000..88360d7dd2 --- /dev/null +++ b/servo/components/selectors/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "selectors" +version = "0.22.0" +authors = ["The Servo Project Developers"] +documentation = "https://docs.rs/selectors/" +description = "CSS Selectors matching for Rust" +repository = "https://github.com/servo/servo" +readme = "README.md" +keywords = ["css", "selectors"] +license = "MPL-2.0" +build = "build.rs" + +[lib] +name = "selectors" +path = "lib.rs" + +[features] +bench = [] + +[dependencies] +bitflags = "2" +cssparser = "0.33" +derive_more = { version = "0.99", default-features = false, features = ["add", "add_assign"] } +fxhash = "0.2" +log = "0.4" +phf = "0.11" +precomputed-hash = "0.1" +servo_arc = { version = "0.1", path = "../servo_arc" } +smallvec = "1.0" +to_shmem = { path = "../to_shmem" } +to_shmem_derive = { path = "../to_shmem_derive" } +new_debug_unreachable = "1" + +[build-dependencies] +phf_codegen = "0.11" diff --git a/servo/components/selectors/README.md b/servo/components/selectors/README.md new file mode 100644 index 0000000000..dac4a7ff91 --- /dev/null +++ b/servo/components/selectors/README.md @@ -0,0 +1,25 @@ +rust-selectors +============== + +* [![Build Status](https://travis-ci.com/servo/rust-selectors.svg?branch=master)]( + https://travis-ci.com/servo/rust-selectors) +* [Documentation](https://docs.rs/selectors/) +* [crates.io](https://crates.io/crates/selectors) + +CSS Selectors library for Rust. +Includes parsing and serilization of selectors, +as well as matching against a generic tree of elements. +Pseudo-elements and most pseudo-classes are generic as well. + +**Warning:** breaking changes are made to this library fairly frequently +(13 times in 2016, for example). +However you can use this crate without updating it that often, +old versions stay available on crates.io and Cargo will only automatically update +to versions that are numbered as compatible. + +To see how to use this library with your own tree representation, +see [Kuchiki’s `src/select.rs`](https://github.com/kuchiki-rs/kuchiki/blob/master/src/select.rs). +(Note however that Kuchiki is not always up to date with the latest rust-selectors version, +so that code may need to be tweaked.) +If you don’t already have a tree data structure, +consider using [Kuchiki](https://github.com/kuchiki-rs/kuchiki) itself. diff --git a/servo/components/selectors/attr.rs b/servo/components/selectors/attr.rs new file mode 100644 index 0000000000..fee2962237 --- /dev/null +++ b/servo/components/selectors/attr.rs @@ -0,0 +1,183 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::parser::SelectorImpl; +use cssparser::ToCss; +use std::fmt; + +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct AttrSelectorWithOptionalNamespace<Impl: SelectorImpl> { + #[shmem(field_bound)] + pub namespace: Option<NamespaceConstraint<(Impl::NamespacePrefix, Impl::NamespaceUrl)>>, + #[shmem(field_bound)] + pub local_name: Impl::LocalName, + pub local_name_lower: Impl::LocalName, + #[shmem(field_bound)] + pub operation: ParsedAttrSelectorOperation<Impl::AttrValue>, +} + +impl<Impl: SelectorImpl> AttrSelectorWithOptionalNamespace<Impl> { + pub fn namespace(&self) -> Option<NamespaceConstraint<&Impl::NamespaceUrl>> { + self.namespace.as_ref().map(|ns| match ns { + NamespaceConstraint::Any => NamespaceConstraint::Any, + NamespaceConstraint::Specific((_, ref url)) => NamespaceConstraint::Specific(url), + }) + } +} + +#[derive(Clone, Eq, PartialEq, ToShmem)] +pub enum NamespaceConstraint<NamespaceUrl> { + Any, + + /// Empty string for no namespace + Specific(NamespaceUrl), +} + +#[derive(Clone, Eq, PartialEq, ToShmem)] +pub enum ParsedAttrSelectorOperation<AttrValue> { + Exists, + WithValue { + operator: AttrSelectorOperator, + case_sensitivity: ParsedCaseSensitivity, + value: AttrValue, + }, +} + +#[derive(Clone, Eq, PartialEq)] +pub enum AttrSelectorOperation<AttrValue> { + Exists, + WithValue { + operator: AttrSelectorOperator, + case_sensitivity: CaseSensitivity, + value: AttrValue, + }, +} + +impl<AttrValue> AttrSelectorOperation<AttrValue> { + pub fn eval_str(&self, element_attr_value: &str) -> bool + where + AttrValue: AsRef<str>, + { + match *self { + AttrSelectorOperation::Exists => true, + AttrSelectorOperation::WithValue { + operator, + case_sensitivity, + ref value, + } => operator.eval_str(element_attr_value, value.as_ref(), case_sensitivity), + } + } +} + +#[derive(Clone, Copy, Eq, PartialEq, ToShmem)] +pub enum AttrSelectorOperator { + Equal, + Includes, + DashMatch, + Prefix, + Substring, + Suffix, +} + +impl ToCss for AttrSelectorOperator { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + // https://drafts.csswg.org/cssom/#serializing-selectors + // See "attribute selector". + dest.write_str(match *self { + AttrSelectorOperator::Equal => "=", + AttrSelectorOperator::Includes => "~=", + AttrSelectorOperator::DashMatch => "|=", + AttrSelectorOperator::Prefix => "^=", + AttrSelectorOperator::Substring => "*=", + AttrSelectorOperator::Suffix => "$=", + }) + } +} + +impl AttrSelectorOperator { + pub fn eval_str( + self, + element_attr_value: &str, + attr_selector_value: &str, + case_sensitivity: CaseSensitivity, + ) -> bool { + let e = element_attr_value.as_bytes(); + let s = attr_selector_value.as_bytes(); + let case = case_sensitivity; + match self { + AttrSelectorOperator::Equal => case.eq(e, s), + AttrSelectorOperator::Prefix => e.len() >= s.len() && case.eq(&e[..s.len()], s), + AttrSelectorOperator::Suffix => { + e.len() >= s.len() && case.eq(&e[(e.len() - s.len())..], s) + }, + AttrSelectorOperator::Substring => { + case.contains(element_attr_value, attr_selector_value) + }, + AttrSelectorOperator::Includes => element_attr_value + .split(SELECTOR_WHITESPACE) + .any(|part| case.eq(part.as_bytes(), s)), + AttrSelectorOperator::DashMatch => { + case.eq(e, s) || (e.get(s.len()) == Some(&b'-') && case.eq(&e[..s.len()], s)) + }, + } + } +} + +/// The definition of whitespace per CSS Selectors Level 3 § 4. +pub static SELECTOR_WHITESPACE: &[char] = &[' ', '\t', '\n', '\r', '\x0C']; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ToShmem)] +pub enum ParsedCaseSensitivity { + /// 's' was specified. + ExplicitCaseSensitive, + /// 'i' was specified. + AsciiCaseInsensitive, + /// No flags were specified and HTML says this is a case-sensitive attribute. + CaseSensitive, + /// No flags were specified and HTML says this is a case-insensitive attribute. + AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum CaseSensitivity { + CaseSensitive, + AsciiCaseInsensitive, +} + +impl CaseSensitivity { + pub fn eq(self, a: &[u8], b: &[u8]) -> bool { + match self { + CaseSensitivity::CaseSensitive => a == b, + CaseSensitivity::AsciiCaseInsensitive => a.eq_ignore_ascii_case(b), + } + } + + pub fn contains(self, haystack: &str, needle: &str) -> bool { + match self { + CaseSensitivity::CaseSensitive => haystack.contains(needle), + CaseSensitivity::AsciiCaseInsensitive => { + if let Some((&n_first_byte, n_rest)) = needle.as_bytes().split_first() { + haystack.bytes().enumerate().any(|(i, byte)| { + if !byte.eq_ignore_ascii_case(&n_first_byte) { + return false; + } + let after_this_byte = &haystack.as_bytes()[i + 1..]; + match after_this_byte.get(..n_rest.len()) { + None => false, + Some(haystack_slice) => haystack_slice.eq_ignore_ascii_case(n_rest), + } + }) + } else { + // any_str.contains("") == true, + // though these cases should be handled with *NeverMatches and never go here. + true + } + }, + } + } +} diff --git a/servo/components/selectors/bloom.rs b/servo/components/selectors/bloom.rs new file mode 100644 index 0000000000..98461d1ba2 --- /dev/null +++ b/servo/components/selectors/bloom.rs @@ -0,0 +1,422 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Counting and non-counting Bloom filters tuned for use as ancestor filters +//! for selector matching. + +use std::fmt::{self, Debug}; + +// The top 8 bits of the 32-bit hash value are not used by the bloom filter. +// Consumers may rely on this to pack hashes more efficiently. +pub const BLOOM_HASH_MASK: u32 = 0x00ffffff; +const KEY_SIZE: usize = 12; + +const ARRAY_SIZE: usize = 1 << KEY_SIZE; +const KEY_MASK: u32 = (1 << KEY_SIZE) - 1; + +/// A counting Bloom filter with 8-bit counters. +pub type BloomFilter = CountingBloomFilter<BloomStorageU8>; + +/// A counting Bloom filter with parameterized storage to handle +/// counters of different sizes. For now we assume that having two hash +/// functions is enough, but we may revisit that decision later. +/// +/// The filter uses an array with 2**KeySize entries. +/// +/// Assuming a well-distributed hash function, a Bloom filter with +/// array size M containing N elements and +/// using k hash function has expected false positive rate exactly +/// +/// $ (1 - (1 - 1/M)^{kN})^k $ +/// +/// because each array slot has a +/// +/// $ (1 - 1/M)^{kN} $ +/// +/// chance of being 0, and the expected false positive rate is the +/// probability that all of the k hash functions will hit a nonzero +/// slot. +/// +/// For reasonable assumptions (M large, kN large, which should both +/// hold if we're worried about false positives) about M and kN this +/// becomes approximately +/// +/// $$ (1 - \exp(-kN/M))^k $$ +/// +/// For our special case of k == 2, that's $(1 - \exp(-2N/M))^2$, +/// or in other words +/// +/// $$ N/M = -0.5 * \ln(1 - \sqrt(r)) $$ +/// +/// where r is the false positive rate. This can be used to compute +/// the desired KeySize for a given load N and false positive rate r. +/// +/// If N/M is assumed small, then the false positive rate can +/// further be approximated as 4*N^2/M^2. So increasing KeySize by +/// 1, which doubles M, reduces the false positive rate by about a +/// factor of 4, and a false positive rate of 1% corresponds to +/// about M/N == 20. +/// +/// What this means in practice is that for a few hundred keys using a +/// KeySize of 12 gives false positive rates on the order of 0.25-4%. +/// +/// Similarly, using a KeySize of 10 would lead to a 4% false +/// positive rate for N == 100 and to quite bad false positive +/// rates for larger N. +#[derive(Clone, Default)] +pub struct CountingBloomFilter<S> +where + S: BloomStorage, +{ + storage: S, +} + +impl<S> CountingBloomFilter<S> +where + S: BloomStorage, +{ + /// Creates a new bloom filter. + #[inline] + pub fn new() -> Self { + Default::default() + } + + #[inline] + pub fn clear(&mut self) { + self.storage = Default::default(); + } + + // Slow linear accessor to make sure the bloom filter is zeroed. This should + // never be used in release builds. + #[cfg(debug_assertions)] + pub fn is_zeroed(&self) -> bool { + self.storage.is_zeroed() + } + + #[cfg(not(debug_assertions))] + pub fn is_zeroed(&self) -> bool { + unreachable!() + } + + /// Inserts an item with a particular hash into the bloom filter. + #[inline] + pub fn insert_hash(&mut self, hash: u32) { + self.storage.adjust_first_slot(hash, true); + self.storage.adjust_second_slot(hash, true); + } + + /// Removes an item with a particular hash from the bloom filter. + #[inline] + pub fn remove_hash(&mut self, hash: u32) { + self.storage.adjust_first_slot(hash, false); + self.storage.adjust_second_slot(hash, false); + } + + /// Check whether the filter might contain an item with the given hash. + /// This can sometimes return true even if the item is not in the filter, + /// but will never return false for items that are actually in the filter. + #[inline] + pub fn might_contain_hash(&self, hash: u32) -> bool { + !self.storage.first_slot_is_empty(hash) && !self.storage.second_slot_is_empty(hash) + } +} + +impl<S> Debug for CountingBloomFilter<S> +where + S: BloomStorage, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut slots_used = 0; + for i in 0..ARRAY_SIZE { + if !self.storage.slot_is_empty(i) { + slots_used += 1; + } + } + write!(f, "BloomFilter({}/{})", slots_used, ARRAY_SIZE) + } +} + +pub trait BloomStorage: Clone + Default { + fn slot_is_empty(&self, index: usize) -> bool; + fn adjust_slot(&mut self, index: usize, increment: bool); + fn is_zeroed(&self) -> bool; + + #[inline] + fn first_slot_is_empty(&self, hash: u32) -> bool { + self.slot_is_empty(Self::first_slot_index(hash)) + } + + #[inline] + fn second_slot_is_empty(&self, hash: u32) -> bool { + self.slot_is_empty(Self::second_slot_index(hash)) + } + + #[inline] + fn adjust_first_slot(&mut self, hash: u32, increment: bool) { + self.adjust_slot(Self::first_slot_index(hash), increment) + } + + #[inline] + fn adjust_second_slot(&mut self, hash: u32, increment: bool) { + self.adjust_slot(Self::second_slot_index(hash), increment) + } + + #[inline] + fn first_slot_index(hash: u32) -> usize { + hash1(hash) as usize + } + + #[inline] + fn second_slot_index(hash: u32) -> usize { + hash2(hash) as usize + } +} + +/// Storage class for a CountingBloomFilter that has 8-bit counters. +pub struct BloomStorageU8 { + counters: [u8; ARRAY_SIZE], +} + +impl BloomStorage for BloomStorageU8 { + #[inline] + fn adjust_slot(&mut self, index: usize, increment: bool) { + let slot = &mut self.counters[index]; + if *slot != 0xff { + // full + if increment { + *slot += 1; + } else { + *slot -= 1; + } + } + } + + #[inline] + fn slot_is_empty(&self, index: usize) -> bool { + self.counters[index] == 0 + } + + #[inline] + fn is_zeroed(&self) -> bool { + self.counters.iter().all(|x| *x == 0) + } +} + +impl Default for BloomStorageU8 { + fn default() -> Self { + BloomStorageU8 { + counters: [0; ARRAY_SIZE], + } + } +} + +impl Clone for BloomStorageU8 { + fn clone(&self) -> Self { + BloomStorageU8 { + counters: self.counters, + } + } +} + +/// Storage class for a CountingBloomFilter that has 1-bit counters. +pub struct BloomStorageBool { + counters: [u8; ARRAY_SIZE / 8], +} + +impl BloomStorage for BloomStorageBool { + #[inline] + fn adjust_slot(&mut self, index: usize, increment: bool) { + let bit = 1 << (index % 8); + let byte = &mut self.counters[index / 8]; + + // Since we have only one bit for storage, decrementing it + // should never do anything. Assert against an accidental + // decrementing of a bit that was never set. + assert!( + increment || (*byte & bit) != 0, + "should not decrement if slot is already false" + ); + + if increment { + *byte |= bit; + } + } + + #[inline] + fn slot_is_empty(&self, index: usize) -> bool { + let bit = 1 << (index % 8); + (self.counters[index / 8] & bit) == 0 + } + + #[inline] + fn is_zeroed(&self) -> bool { + self.counters.iter().all(|x| *x == 0) + } +} + +impl Default for BloomStorageBool { + fn default() -> Self { + BloomStorageBool { + counters: [0; ARRAY_SIZE / 8], + } + } +} + +impl Clone for BloomStorageBool { + fn clone(&self) -> Self { + BloomStorageBool { + counters: self.counters, + } + } +} + +#[inline] +fn hash1(hash: u32) -> u32 { + hash & KEY_MASK +} + +#[inline] +fn hash2(hash: u32) -> u32 { + (hash >> KEY_SIZE) & KEY_MASK +} + +#[test] +fn create_and_insert_some_stuff() { + use fxhash::FxHasher; + use std::hash::{Hash, Hasher}; + use std::mem::transmute; + + fn hash_as_str(i: usize) -> u32 { + let mut hasher = FxHasher::default(); + let s = i.to_string(); + s.hash(&mut hasher); + let hash: u64 = hasher.finish(); + (hash >> 32) as u32 ^ (hash as u32) + } + + let mut bf = BloomFilter::new(); + + // Statically assert that ARRAY_SIZE is a multiple of 8, which + // BloomStorageBool relies on. + unsafe { + transmute::<[u8; ARRAY_SIZE % 8], [u8; 0]>([]); + } + + for i in 0_usize..1000 { + bf.insert_hash(hash_as_str(i)); + } + + for i in 0_usize..1000 { + assert!(bf.might_contain_hash(hash_as_str(i))); + } + + let false_positives = (1001_usize..2000) + .filter(|i| bf.might_contain_hash(hash_as_str(*i))) + .count(); + + assert!(false_positives < 190, "{} is not < 190", false_positives); // 19%. + + for i in 0_usize..100 { + bf.remove_hash(hash_as_str(i)); + } + + for i in 100_usize..1000 { + assert!(bf.might_contain_hash(hash_as_str(i))); + } + + let false_positives = (0_usize..100) + .filter(|i| bf.might_contain_hash(hash_as_str(*i))) + .count(); + + assert!(false_positives < 20, "{} is not < 20", false_positives); // 20%. + + bf.clear(); + + for i in 0_usize..2000 { + assert!(!bf.might_contain_hash(hash_as_str(i))); + } +} + +#[cfg(feature = "bench")] +#[cfg(test)] +mod bench { + extern crate test; + use super::BloomFilter; + + #[derive(Default)] + struct HashGenerator(u32); + + impl HashGenerator { + fn next(&mut self) -> u32 { + // Each hash is split into two twelve-bit segments, which are used + // as an index into an array. We increment each by 64 so that we + // hit the next cache line, and then another 1 so that our wrapping + // behavior leads us to different entries each time. + // + // Trying to simulate cold caches is rather difficult with the cargo + // benchmarking setup, so it may all be moot depending on the number + // of iterations that end up being run. But we might as well. + self.0 += (65) + (65 << super::KEY_SIZE); + self.0 + } + } + + #[bench] + fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) { + b.iter(|| { + let mut gen1 = HashGenerator::default(); + let mut gen2 = HashGenerator::default(); + let mut bf = BloomFilter::new(); + for _ in 0_usize..1000 { + bf.insert_hash(gen1.next()); + } + for _ in 0_usize..100 { + bf.remove_hash(gen2.next()); + } + for _ in 100_usize..200 { + test::black_box(bf.might_contain_hash(gen2.next())); + } + }); + } + + #[bench] + fn might_contain_10(b: &mut test::Bencher) { + let bf = BloomFilter::new(); + let mut gen = HashGenerator::default(); + b.iter(|| { + for _ in 0..10 { + test::black_box(bf.might_contain_hash(gen.next())); + } + }); + } + + #[bench] + fn clear(b: &mut test::Bencher) { + let mut bf = Box::new(BloomFilter::new()); + b.iter(|| test::black_box(&mut bf).clear()); + } + + #[bench] + fn insert_10(b: &mut test::Bencher) { + let mut bf = BloomFilter::new(); + let mut gen = HashGenerator::default(); + b.iter(|| { + for _ in 0..10 { + test::black_box(bf.insert_hash(gen.next())); + } + }); + } + + #[bench] + fn remove_10(b: &mut test::Bencher) { + let mut bf = BloomFilter::new(); + let mut gen = HashGenerator::default(); + // Note: this will underflow, and that's ok. + b.iter(|| { + for _ in 0..10 { + bf.remove_hash(gen.next()) + } + }); + } +} diff --git a/servo/components/selectors/build.rs b/servo/components/selectors/build.rs new file mode 100644 index 0000000000..c5c3803991 --- /dev/null +++ b/servo/components/selectors/build.rs @@ -0,0 +1,77 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +extern crate phf_codegen; + +use std::env; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let path = Path::new(&env::var_os("OUT_DIR").unwrap()) + .join("ascii_case_insensitive_html_attributes.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + + let mut set = phf_codegen::Set::new(); + for name in ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES.split_whitespace() { + set.entry(name); + } + write!( + &mut file, + "{{ static SET: ::phf::Set<&'static str> = {}; &SET }}", + set.build(), + ) + .unwrap(); +} + +/// <https://html.spec.whatwg.org/multipage/#selectors> +static ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES: &str = r#" + accept + accept-charset + align + alink + axis + bgcolor + charset + checked + clear + codetype + color + compact + declare + defer + dir + direction + disabled + enctype + face + frame + hreflang + http-equiv + lang + language + link + media + method + multiple + nohref + noresize + noshade + nowrap + readonly + rel + rev + rules + scope + scrolling + selected + shape + target + text + type + valign + valuetype + vlink +"#; diff --git a/servo/components/selectors/builder.rs b/servo/components/selectors/builder.rs new file mode 100644 index 0000000000..2406cd84f6 --- /dev/null +++ b/servo/components/selectors/builder.rs @@ -0,0 +1,391 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Helper module to build up a selector safely and efficiently. +//! +//! Our selector representation is designed to optimize matching, and has +//! several requirements: +//! * All simple selectors and combinators are stored inline in the same buffer +//! as Component instances. +//! * We store the top-level compound selectors from right to left, i.e. in +//! matching order. +//! * We store the simple selectors for each combinator from left to right, so +//! that we match the cheaper simple selectors first. +//! +//! Meeting all these constraints without extra memmove traffic during parsing +//! is non-trivial. This module encapsulates those details and presents an +//! easy-to-use API for the parser. + +use crate::parser::{Combinator, Component, RelativeSelector, Selector, SelectorImpl}; +use crate::sink::Push; +use servo_arc::{Arc, ThinArc}; +use smallvec::{self, SmallVec}; +use std::cmp; +use std::iter; +use std::ptr; +use std::slice; + +/// Top-level SelectorBuilder struct. This should be stack-allocated by the +/// consumer and never moved (because it contains a lot of inline data that +/// would be slow to memmov). +/// +/// After instantation, callers may call the push_simple_selector() and +/// push_combinator() methods to append selector data as it is encountered +/// (from left to right). Once the process is complete, callers should invoke +/// build(), which transforms the contents of the SelectorBuilder into a heap- +/// allocated Selector and leaves the builder in a drained state. +#[derive(Debug)] +pub struct SelectorBuilder<Impl: SelectorImpl> { + /// The entire sequence of simple selectors, from left to right, without combinators. + /// + /// We make this large because the result of parsing a selector is fed into a new + /// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also, + /// Components are large enough that we don't have much cache locality benefit + /// from reserving stack space for fewer of them. + simple_selectors: SmallVec<[Component<Impl>; 32]>, + /// The combinators, and the length of the compound selector to their left. + combinators: SmallVec<[(Combinator, usize); 16]>, + /// The length of the current compount selector. + current_len: usize, +} + +impl<Impl: SelectorImpl> Default for SelectorBuilder<Impl> { + #[inline(always)] + fn default() -> Self { + SelectorBuilder { + simple_selectors: SmallVec::new(), + combinators: SmallVec::new(), + current_len: 0, + } + } +} + +impl<Impl: SelectorImpl> Push<Component<Impl>> for SelectorBuilder<Impl> { + fn push(&mut self, value: Component<Impl>) { + self.push_simple_selector(value); + } +} + +impl<Impl: SelectorImpl> SelectorBuilder<Impl> { + /// Pushes a simple selector onto the current compound selector. + #[inline(always)] + pub fn push_simple_selector(&mut self, ss: Component<Impl>) { + assert!(!ss.is_combinator()); + self.simple_selectors.push(ss); + self.current_len += 1; + } + + /// Completes the current compound selector and starts a new one, delimited + /// by the given combinator. + #[inline(always)] + pub fn push_combinator(&mut self, c: Combinator) { + self.combinators.push((c, self.current_len)); + self.current_len = 0; + } + + /// Returns true if combinators have ever been pushed to this builder. + #[inline(always)] + pub fn has_combinators(&self) -> bool { + !self.combinators.is_empty() + } + + /// Consumes the builder, producing a Selector. + #[inline(always)] + pub fn build(&mut self) -> ThinArc<SpecificityAndFlags, Component<Impl>> { + // Compute the specificity and flags. + let sf = specificity_and_flags(self.simple_selectors.iter()); + self.build_with_specificity_and_flags(sf) + } + + /// Builds with an explicit SpecificityAndFlags. This is separated from build() so + /// that unit tests can pass an explicit specificity. + #[inline(always)] + pub(crate) fn build_with_specificity_and_flags( + &mut self, + spec: SpecificityAndFlags, + ) -> ThinArc<SpecificityAndFlags, Component<Impl>> { + // Create the Arc using an iterator that drains our buffers. + // Panic-safety: if SelectorBuilderIter is not iterated to the end, some simple selectors + // will safely leak. + let raw_simple_selectors = unsafe { + let simple_selectors_len = self.simple_selectors.len(); + self.simple_selectors.set_len(0); + std::slice::from_raw_parts(self.simple_selectors.as_ptr(), simple_selectors_len) + }; + let (rest, current) = split_from_end(raw_simple_selectors, self.current_len); + let iter = SelectorBuilderIter { + current_simple_selectors: current.iter(), + rest_of_simple_selectors: rest, + combinators: self.combinators.drain(..).rev(), + }; + + Arc::from_header_and_iter(spec, iter) + } +} + +struct SelectorBuilderIter<'a, Impl: SelectorImpl> { + current_simple_selectors: slice::Iter<'a, Component<Impl>>, + rest_of_simple_selectors: &'a [Component<Impl>], + combinators: iter::Rev<smallvec::Drain<'a, [(Combinator, usize); 16]>>, +} + +impl<'a, Impl: SelectorImpl> ExactSizeIterator for SelectorBuilderIter<'a, Impl> { + fn len(&self) -> usize { + self.current_simple_selectors.len() + + self.rest_of_simple_selectors.len() + + self.combinators.len() + } +} + +impl<'a, Impl: SelectorImpl> Iterator for SelectorBuilderIter<'a, Impl> { + type Item = Component<Impl>; + #[inline(always)] + fn next(&mut self) -> Option<Self::Item> { + if let Some(simple_selector_ref) = self.current_simple_selectors.next() { + // Move a simple selector out of this slice iterator. + // This is safe because we’ve called SmallVec::set_len(0) above, + // so SmallVec::drop won’t drop this simple selector. + unsafe { Some(ptr::read(simple_selector_ref)) } + } else { + self.combinators.next().map(|(combinator, len)| { + let (rest, current) = split_from_end(self.rest_of_simple_selectors, len); + self.rest_of_simple_selectors = rest; + self.current_simple_selectors = current.iter(); + Component::Combinator(combinator) + }) + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +fn split_from_end<T>(s: &[T], at: usize) -> (&[T], &[T]) { + s.split_at(s.len() - at) +} + +/// Flags that indicate at which point of parsing a selector are we. +#[derive(Clone, Copy, Default, Eq, PartialEq, ToShmem)] +pub(crate) struct SelectorFlags(u8); + +bitflags! { + impl SelectorFlags: u8 { + const HAS_PSEUDO = 1 << 0; + const HAS_SLOTTED = 1 << 1; + const HAS_PART = 1 << 2; + const HAS_PARENT = 1 << 3; + const HAS_NON_FEATURELESS_COMPONENT = 1 << 4; + const HAS_HOST = 1 << 5; + } +} + +impl core::fmt::Debug for SelectorFlags { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.is_empty() { + write!(f, "{:#x}", Self::empty().bits()) + } else { + bitflags::parser::to_writer(self, f) + } + } +} + +impl SelectorFlags { + /// When you nest a pseudo-element with something like: + /// + /// ::before { & { .. } } + /// + /// It is not supposed to work, because :is(::before) is invalid. We can't propagate the + /// pseudo-flags from inner to outer selectors, to avoid breaking our invariants. + pub(crate) fn for_nesting() -> Self { + Self::all() - (Self::HAS_PSEUDO | Self::HAS_SLOTTED | Self::HAS_PART) + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, ToShmem)] +pub struct SpecificityAndFlags { + /// There are two free bits here, since we use ten bits for each specificity + /// kind (id, class, element). + pub(crate) specificity: u32, + /// There's padding after this field due to the size of the flags. + pub(crate) flags: SelectorFlags, +} + +const MAX_10BIT: u32 = (1u32 << 10) - 1; + +#[derive(Add, AddAssign, Clone, Copy, Default, Eq, Ord, PartialEq, PartialOrd)] +pub(crate) struct Specificity { + id_selectors: u32, + class_like_selectors: u32, + element_selectors: u32, +} + +impl From<u32> for Specificity { + #[inline] + fn from(value: u32) -> Specificity { + assert!(value <= MAX_10BIT << 20 | MAX_10BIT << 10 | MAX_10BIT); + Specificity { + id_selectors: value >> 20, + class_like_selectors: (value >> 10) & MAX_10BIT, + element_selectors: value & MAX_10BIT, + } + } +} + +impl From<Specificity> for u32 { + #[inline] + fn from(specificity: Specificity) -> u32 { + cmp::min(specificity.id_selectors, MAX_10BIT) << 20 | + cmp::min(specificity.class_like_selectors, MAX_10BIT) << 10 | + cmp::min(specificity.element_selectors, MAX_10BIT) + } +} + +pub(crate) fn specificity_and_flags<Impl>(iter: slice::Iter<Component<Impl>>) -> SpecificityAndFlags +where + Impl: SelectorImpl, +{ + complex_selector_specificity_and_flags(iter).into() +} + +fn complex_selector_specificity_and_flags<Impl>( + iter: slice::Iter<Component<Impl>>, +) -> SpecificityAndFlags +where + Impl: SelectorImpl, +{ + fn component_specificity<Impl>( + simple_selector: &Component<Impl>, + specificity: &mut Specificity, + flags: &mut SelectorFlags, + ) where + Impl: SelectorImpl, + { + match *simple_selector { + Component::Combinator(..) => {}, + Component::ParentSelector => flags.insert(SelectorFlags::HAS_PARENT), + Component::Part(..) => { + flags.insert(SelectorFlags::HAS_PART); + specificity.element_selectors += 1 + }, + Component::PseudoElement(..) => { + flags.insert(SelectorFlags::HAS_PSEUDO); + specificity.element_selectors += 1 + }, + Component::LocalName(..) => { + flags.insert(SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + specificity.element_selectors += 1 + }, + Component::Slotted(ref selector) => { + flags.insert( + SelectorFlags::HAS_SLOTTED | SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + ); + specificity.element_selectors += 1; + // Note that due to the way ::slotted works we only compete with + // other ::slotted rules, so the above rule doesn't really + // matter, but we do it still for consistency with other + // pseudo-elements. + // + // See: https://github.com/w3c/csswg-drafts/issues/1915 + *specificity += Specificity::from(selector.specificity()); + flags.insert(selector.flags()); + }, + Component::Host(ref selector) => { + flags.insert(SelectorFlags::HAS_HOST); + specificity.class_like_selectors += 1; + if let Some(ref selector) = *selector { + // See: https://github.com/w3c/csswg-drafts/issues/1915 + *specificity += Specificity::from(selector.specificity()); + flags.insert(selector.flags() - SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + } + }, + Component::ID(..) => { + flags.insert(SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + specificity.id_selectors += 1; + }, + Component::Class(..) | + Component::AttributeInNoNamespace { .. } | + Component::AttributeInNoNamespaceExists { .. } | + Component::AttributeOther(..) | + Component::Root | + Component::Empty | + Component::Scope | + Component::Nth(..) | + Component::NonTSPseudoClass(..) => { + flags.insert(SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + specificity.class_like_selectors += 1; + }, + Component::NthOf(ref nth_of_data) => { + // https://drafts.csswg.org/selectors/#specificity-rules: + // + // The specificity of the :nth-last-child() pseudo-class, + // like the :nth-child() pseudo-class, combines the + // specificity of a regular pseudo-class with that of its + // selector argument S. + specificity.class_like_selectors += 1; + let sf = selector_list_specificity_and_flags(nth_of_data.selectors().iter()); + *specificity += Specificity::from(sf.specificity); + flags.insert(sf.flags | SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + }, + // https://drafts.csswg.org/selectors/#specificity-rules: + // + // The specificity of an :is(), :not(), or :has() pseudo-class + // is replaced by the specificity of the most specific complex + // selector in its selector list argument. + Component::Where(ref list) | + Component::Negation(ref list) | + Component::Is(ref list) => { + let sf = selector_list_specificity_and_flags(list.slice().iter()); + if !matches!(*simple_selector, Component::Where(..)) { + *specificity += Specificity::from(sf.specificity); + } + flags.insert(sf.flags); + }, + Component::Has(ref relative_selectors) => { + let sf = relative_selector_list_specificity_and_flags(relative_selectors); + *specificity += Specificity::from(sf.specificity); + flags.insert(sf.flags | SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + }, + Component::ExplicitUniversalType | + Component::ExplicitAnyNamespace | + Component::ExplicitNoNamespace | + Component::DefaultNamespace(..) | + Component::Namespace(..) | + Component::RelativeSelectorAnchor | + Component::Invalid(..) => { + // Does not affect specificity + flags.insert(SelectorFlags::HAS_NON_FEATURELESS_COMPONENT); + }, + } + } + + let mut specificity = Default::default(); + let mut flags = Default::default(); + for simple_selector in iter { + component_specificity(&simple_selector, &mut specificity, &mut flags); + } + SpecificityAndFlags { + specificity: specificity.into(), + flags, + } +} + +/// Finds the maximum specificity of elements in the list and returns it. +pub(crate) fn selector_list_specificity_and_flags<'a, Impl: SelectorImpl>( + itr: impl Iterator<Item = &'a Selector<Impl>>, +) -> SpecificityAndFlags { + let mut specificity = 0; + let mut flags = SelectorFlags::empty(); + for selector in itr { + specificity = std::cmp::max(specificity, selector.specificity()); + flags.insert(selector.flags()); + } + SpecificityAndFlags { specificity, flags } +} + +pub(crate) fn relative_selector_list_specificity_and_flags<Impl: SelectorImpl>( + list: &[RelativeSelector<Impl>], +) -> SpecificityAndFlags { + selector_list_specificity_and_flags(list.iter().map(|rel| &rel.selector)) +} diff --git a/servo/components/selectors/context.rs b/servo/components/selectors/context.rs new file mode 100644 index 0000000000..84ee262dfe --- /dev/null +++ b/servo/components/selectors/context.rs @@ -0,0 +1,418 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::attr::CaseSensitivity; +use crate::bloom::BloomFilter; +use crate::nth_index_cache::{NthIndexCache, NthIndexCacheInner}; +use crate::parser::{Selector, SelectorImpl}; +use crate::relative_selector::cache::RelativeSelectorCache; +use crate::relative_selector::filter::RelativeSelectorFilterMap; +use crate::tree::{Element, OpaqueElement}; + +/// What kind of selector matching mode we should use. +/// +/// There are two modes of selector matching. The difference is only noticeable +/// in presence of pseudo-elements. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum MatchingMode { + /// Don't ignore any pseudo-element selectors. + Normal, + + /// Ignores any stateless pseudo-element selectors in the rightmost sequence + /// of simple selectors. + /// + /// This is useful, for example, to match against ::before when you aren't a + /// pseudo-element yourself. + /// + /// For example, in presence of `::before:hover`, it would never match, but + /// `::before` would be ignored as in "matching". + /// + /// It's required for all the selectors you match using this mode to have a + /// pseudo-element. + ForStatelessPseudoElement, +} + +/// The mode to use when matching unvisited and visited links. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum VisitedHandlingMode { + /// All links are matched as if they are unvisted. + AllLinksUnvisited, + /// All links are matched as if they are visited and unvisited (both :link + /// and :visited match). + /// + /// This is intended to be used from invalidation code, to be conservative + /// about whether we need to restyle a link. + AllLinksVisitedAndUnvisited, + /// A element's "relevant link" is the element being matched if it is a link + /// or the nearest ancestor link. The relevant link is matched as though it + /// is visited, and all other links are matched as if they are unvisited. + RelevantLinkVisited, +} + +impl VisitedHandlingMode { + #[inline] + pub fn matches_visited(&self) -> bool { + matches!( + *self, + VisitedHandlingMode::RelevantLinkVisited | + VisitedHandlingMode::AllLinksVisitedAndUnvisited + ) + } + + #[inline] + pub fn matches_unvisited(&self) -> bool { + matches!( + *self, + VisitedHandlingMode::AllLinksUnvisited | + VisitedHandlingMode::AllLinksVisitedAndUnvisited + ) + } +} + +/// Whether we need to set selector invalidation flags on elements for this +/// match request. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum NeedsSelectorFlags { + No, + Yes, +} + +/// Whether we're matching in the contect of invalidation. +#[derive(PartialEq)] +pub enum MatchingForInvalidation { + No, + Yes, +} + +/// Which quirks mode is this document in. +/// +/// See: https://quirks.spec.whatwg.org/ +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub enum QuirksMode { + /// Quirks mode. + Quirks, + /// Limited quirks mode. + LimitedQuirks, + /// No quirks mode. + NoQuirks, +} + +impl QuirksMode { + #[inline] + pub fn classes_and_ids_case_sensitivity(self) -> CaseSensitivity { + match self { + QuirksMode::NoQuirks | QuirksMode::LimitedQuirks => CaseSensitivity::CaseSensitive, + QuirksMode::Quirks => CaseSensitivity::AsciiCaseInsensitive, + } + } +} + +/// Whether or not this matching considered relative selector. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RelativeSelectorMatchingState { + /// Was not considered for any relative selector. + None, + /// Relative selector was considered for a match, but the element to be + /// under matching would not anchor the relative selector. i.e. The + /// relative selector was not part of the first compound selector (in match + /// order). + Considered, + /// Same as above, but the relative selector was part of the first compound + /// selector (in match order). + ConsideredAnchor, +} + +impl RelativeSelectorMatchingState { + /// Update the matching state to indicate that the relative selector matching + /// happened in the subject position. + pub fn considered_anchor(&mut self) { + *self = Self::ConsideredAnchor; + } + + /// Update the matching state to indicate that the relative selector matching + /// happened in a non-subject position. + pub fn considered(&mut self) { + // Being considered an anchor is stronger (e.g. `:has(.a):is(:has(.b) .c)`). + if *self == Self::ConsideredAnchor { + *self = Self::ConsideredAnchor; + } else { + *self = Self::Considered; + } + } +} + +/// Set of caches (And cache-likes) that speed up expensive selector matches. +#[derive(Default)] +pub struct SelectorCaches { + /// A cache to speed up nth-index-like selectors. + pub nth_index: NthIndexCache, + /// A cache to speed up relative selector matches. See module documentation. + pub relative_selector: RelativeSelectorCache, + /// A map of bloom filters to fast-reject relative selector matches. + pub relative_selector_filter_map: RelativeSelectorFilterMap, +} + +/// Data associated with the matching process for a element. This context is +/// used across many selectors for an element, so it's not appropriate for +/// transient data that applies to only a single selector. +pub struct MatchingContext<'a, Impl> +where + Impl: SelectorImpl, +{ + /// Input with the matching mode we should use when matching selectors. + matching_mode: MatchingMode, + /// Input with the bloom filter used to fast-reject selectors. + pub bloom_filter: Option<&'a BloomFilter>, + /// The element which is going to match :scope pseudo-class. It can be + /// either one :scope element, or the scoping element. + /// + /// Note that, although in theory there can be multiple :scope elements, + /// in current specs, at most one is specified, and when there is one, + /// scoping element is not relevant anymore, so we use a single field for + /// them. + /// + /// When this is None, :scope will match the root element. + /// + /// See https://drafts.csswg.org/selectors-4/#scope-pseudo + pub scope_element: Option<OpaqueElement>, + + /// The current shadow host we're collecting :host rules for. + pub current_host: Option<OpaqueElement>, + + /// Controls how matching for links is handled. + visited_handling: VisitedHandlingMode, + + /// The current nesting level of selectors that we're matching. + nesting_level: usize, + + /// Whether we're inside a negation or not. + in_negation: bool, + + /// An optional hook function for checking whether a pseudo-element + /// should match when matching_mode is ForStatelessPseudoElement. + pub pseudo_element_matching_fn: Option<&'a dyn Fn(&Impl::PseudoElement) -> bool>, + + /// Extra implementation-dependent matching data. + pub extra_data: Impl::ExtraMatchingData<'a>, + + /// The current element we're anchoring on for evaluating the relative selector. + current_relative_selector_anchor: Option<OpaqueElement>, + pub considered_relative_selector: RelativeSelectorMatchingState, + + quirks_mode: QuirksMode, + needs_selector_flags: NeedsSelectorFlags, + + /// Whether we're matching in the contect of invalidation. + matching_for_invalidation: MatchingForInvalidation, + + /// Caches to speed up expensive selector matches. + pub selector_caches: &'a mut SelectorCaches, + + classes_and_ids_case_sensitivity: CaseSensitivity, + _impl: ::std::marker::PhantomData<Impl>, +} + +impl<'a, Impl> MatchingContext<'a, Impl> +where + Impl: SelectorImpl, +{ + /// Constructs a new `MatchingContext`. + pub fn new( + matching_mode: MatchingMode, + bloom_filter: Option<&'a BloomFilter>, + selector_caches: &'a mut SelectorCaches, + quirks_mode: QuirksMode, + needs_selector_flags: NeedsSelectorFlags, + matching_for_invalidation: MatchingForInvalidation, + ) -> Self { + Self::new_for_visited( + matching_mode, + bloom_filter, + selector_caches, + VisitedHandlingMode::AllLinksUnvisited, + quirks_mode, + needs_selector_flags, + matching_for_invalidation, + ) + } + + /// Constructs a new `MatchingContext` for use in visited matching. + pub fn new_for_visited( + matching_mode: MatchingMode, + bloom_filter: Option<&'a BloomFilter>, + selector_caches: &'a mut SelectorCaches, + visited_handling: VisitedHandlingMode, + quirks_mode: QuirksMode, + needs_selector_flags: NeedsSelectorFlags, + matching_for_invalidation: MatchingForInvalidation, + ) -> Self { + Self { + matching_mode, + bloom_filter, + visited_handling, + quirks_mode, + classes_and_ids_case_sensitivity: quirks_mode.classes_and_ids_case_sensitivity(), + needs_selector_flags, + matching_for_invalidation, + scope_element: None, + current_host: None, + nesting_level: 0, + in_negation: false, + pseudo_element_matching_fn: None, + extra_data: Default::default(), + current_relative_selector_anchor: None, + considered_relative_selector: RelativeSelectorMatchingState::None, + selector_caches, + _impl: ::std::marker::PhantomData, + } + } + + // Grab a reference to the appropriate cache. + #[inline] + pub fn nth_index_cache( + &mut self, + is_of_type: bool, + is_from_end: bool, + selectors: &[Selector<Impl>], + ) -> &mut NthIndexCacheInner { + self.selector_caches + .nth_index + .get(is_of_type, is_from_end, selectors) + } + + /// Whether we're matching a nested selector. + #[inline] + pub fn is_nested(&self) -> bool { + self.nesting_level != 0 + } + + /// Whether we're matching inside a :not(..) selector. + #[inline] + pub fn in_negation(&self) -> bool { + self.in_negation + } + + /// The quirks mode of the document. + #[inline] + pub fn quirks_mode(&self) -> QuirksMode { + self.quirks_mode + } + + /// The matching-mode for this selector-matching operation. + #[inline] + pub fn matching_mode(&self) -> MatchingMode { + self.matching_mode + } + + /// Whether we need to set selector flags. + #[inline] + pub fn needs_selector_flags(&self) -> bool { + self.needs_selector_flags == NeedsSelectorFlags::Yes + } + + /// Whether or not we're matching to invalidate. + #[inline] + pub fn matching_for_invalidation(&self) -> bool { + self.matching_for_invalidation == MatchingForInvalidation::Yes + } + + /// The case-sensitivity for class and ID selectors + #[inline] + pub fn classes_and_ids_case_sensitivity(&self) -> CaseSensitivity { + self.classes_and_ids_case_sensitivity + } + + /// Runs F with a deeper nesting level. + #[inline] + pub fn nest<F, R>(&mut self, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + self.nesting_level += 1; + let result = f(self); + self.nesting_level -= 1; + result + } + + /// Runs F with a deeper nesting level, and marking ourselves in a negation, + /// for a :not(..) selector, for example. + #[inline] + pub fn nest_for_negation<F, R>(&mut self, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + let old_in_negation = self.in_negation; + self.in_negation = true; + let result = self.nest(f); + self.in_negation = old_in_negation; + result + } + + #[inline] + pub fn visited_handling(&self) -> VisitedHandlingMode { + self.visited_handling + } + + /// Runs F with a different VisitedHandlingMode. + #[inline] + pub fn with_visited_handling_mode<F, R>( + &mut self, + handling_mode: VisitedHandlingMode, + f: F, + ) -> R + where + F: FnOnce(&mut Self) -> R, + { + let original_handling_mode = self.visited_handling; + self.visited_handling = handling_mode; + let result = f(self); + self.visited_handling = original_handling_mode; + result + } + + /// Runs F with a given shadow host which is the root of the tree whose + /// rules we're matching. + #[inline] + pub fn with_shadow_host<F, E, R>(&mut self, host: Option<E>, f: F) -> R + where + E: Element, + F: FnOnce(&mut Self) -> R, + { + let original_host = self.current_host.take(); + self.current_host = host.map(|h| h.opaque()); + let result = f(self); + self.current_host = original_host; + result + } + + /// Returns the current shadow host whose shadow root we're matching rules + /// against. + #[inline] + pub fn shadow_host(&self) -> Option<OpaqueElement> { + self.current_host + } + + /// Runs F with a deeper nesting level, with the given element as the anchor, + /// for a :has(...) selector, for example. + #[inline] + pub fn nest_for_relative_selector<F, R>(&mut self, anchor: OpaqueElement, f: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + debug_assert!( + self.current_relative_selector_anchor.is_none(), + "Nesting should've been rejected at parse time" + ); + self.current_relative_selector_anchor = Some(anchor); + let result = self.nest(f); + self.current_relative_selector_anchor = None; + result + } + + /// Returns the current anchor element to evaluate the relative selector against. + #[inline] + pub fn relative_selector_anchor(&self) -> Option<OpaqueElement> { + self.current_relative_selector_anchor + } +} diff --git a/servo/components/selectors/lib.rs b/servo/components/selectors/lib.rs new file mode 100644 index 0000000000..d909059ccf --- /dev/null +++ b/servo/components/selectors/lib.rs @@ -0,0 +1,41 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +// Make |cargo bench| work. +#![cfg_attr(feature = "bench", feature(test))] + +#[macro_use] +extern crate bitflags; +#[macro_use] +extern crate cssparser; +#[macro_use] +extern crate debug_unreachable; +#[macro_use] +extern crate derive_more; +extern crate fxhash; +#[macro_use] +extern crate log; +extern crate phf; +extern crate precomputed_hash; +extern crate servo_arc; +extern crate smallvec; +extern crate to_shmem; +#[macro_use] +extern crate to_shmem_derive; + +pub mod attr; +pub mod bloom; +mod builder; +pub mod context; +pub mod matching; +mod nth_index_cache; +pub mod parser; +pub mod relative_selector; +pub mod sink; +mod tree; +pub mod visitor; + +pub use crate::nth_index_cache::NthIndexCache; +pub use crate::parser::{Parser, SelectorImpl, SelectorList}; +pub use crate::tree::{Element, OpaqueElement}; diff --git a/servo/components/selectors/matching.rs b/servo/components/selectors/matching.rs new file mode 100644 index 0000000000..763f65d547 --- /dev/null +++ b/servo/components/selectors/matching.rs @@ -0,0 +1,1370 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::attr::{ + AttrSelectorOperation, AttrSelectorWithOptionalNamespace, CaseSensitivity, NamespaceConstraint, + ParsedAttrSelectorOperation, ParsedCaseSensitivity, +}; +use crate::bloom::{BloomFilter, BLOOM_HASH_MASK}; +use crate::parser::{ + AncestorHashes, Combinator, Component, LocalName, NthSelectorData, RelativeSelectorMatchHint, +}; +use crate::parser::{ + NonTSPseudoClass, RelativeSelector, Selector, SelectorImpl, SelectorIter, SelectorList, +}; +use crate::relative_selector::cache::RelativeSelectorCachedMatch; +use crate::tree::Element; +use smallvec::SmallVec; +use std::borrow::Borrow; + +pub use crate::context::*; + +// The bloom filter for descendant CSS selectors will have a <1% false +// positive rate until it has this many selectors in it, then it will +// rapidly increase. +pub static RECOMMENDED_SELECTOR_BLOOM_FILTER_SIZE: usize = 4096; + +bitflags! { + /// Set of flags that are set on either the element or its parent (depending + /// on the flag) if the element could potentially match a selector. + #[derive(Clone, Copy)] + pub struct ElementSelectorFlags: usize { + /// When a child is added or removed from the parent, all the children + /// must be restyled, because they may match :nth-last-child, + /// :last-of-type, :nth-last-of-type, or :only-of-type. + const HAS_SLOW_SELECTOR = 1 << 0; + + /// When a child is added or removed from the parent, any later + /// children must be restyled, because they may match :nth-child, + /// :first-of-type, or :nth-of-type. + const HAS_SLOW_SELECTOR_LATER_SIBLINGS = 1 << 1; + + /// HAS_SLOW_SELECTOR* was set by the presence of :nth (But not of). + const HAS_SLOW_SELECTOR_NTH = 1 << 2; + + /// When a DOM mutation occurs on a child that might be matched by + /// :nth-last-child(.. of <selector list>), earlier children must be + /// restyled, and HAS_SLOW_SELECTOR will be set (which normally + /// indicates that all children will be restyled). + /// + /// Similarly, when a DOM mutation occurs on a child that might be + /// matched by :nth-child(.. of <selector list>), later children must be + /// restyled, and HAS_SLOW_SELECTOR_LATER_SIBLINGS will be set. + const HAS_SLOW_SELECTOR_NTH_OF = 1 << 3; + + /// When a child is added or removed from the parent, the first and + /// last children must be restyled, because they may match :first-child, + /// :last-child, or :only-child. + const HAS_EDGE_CHILD_SELECTOR = 1 << 4; + + /// The element has an empty selector, so when a child is appended we + /// might need to restyle the parent completely. + const HAS_EMPTY_SELECTOR = 1 << 5; + + /// The element may anchor a relative selector. + const ANCHORS_RELATIVE_SELECTOR = 1 << 6; + + /// The element may anchor a relative selector that is not the subject + /// of the whole selector. + const ANCHORS_RELATIVE_SELECTOR_NON_SUBJECT = 1 << 7; + + /// The element is reached by a relative selector search in the sibling direction. + const RELATIVE_SELECTOR_SEARCH_DIRECTION_SIBLING = 1 << 8; + + /// The element is reached by a relative selector search in the ancestor direction. + const RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR = 1 << 9; + + // The element is reached by a relative selector search in both sibling and ancestor directions. + const RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR_SIBLING = + Self::RELATIVE_SELECTOR_SEARCH_DIRECTION_SIBLING.bits() | + Self::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR.bits(); + } +} + +impl ElementSelectorFlags { + /// Returns the subset of flags that apply to the element. + pub fn for_self(self) -> ElementSelectorFlags { + self & (ElementSelectorFlags::HAS_EMPTY_SELECTOR | + ElementSelectorFlags::ANCHORS_RELATIVE_SELECTOR | + ElementSelectorFlags::ANCHORS_RELATIVE_SELECTOR_NON_SUBJECT | + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_SIBLING | + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR) + } + + /// Returns the subset of flags that apply to the parent. + pub fn for_parent(self) -> ElementSelectorFlags { + self & (ElementSelectorFlags::HAS_SLOW_SELECTOR | + ElementSelectorFlags::HAS_SLOW_SELECTOR_LATER_SIBLINGS | + ElementSelectorFlags::HAS_SLOW_SELECTOR_NTH | + ElementSelectorFlags::HAS_SLOW_SELECTOR_NTH_OF | + ElementSelectorFlags::HAS_EDGE_CHILD_SELECTOR) + } +} + +/// Holds per-compound-selector data. +struct LocalMatchingContext<'a, 'b: 'a, Impl: SelectorImpl> { + shared: &'a mut MatchingContext<'b, Impl>, + rightmost: SubjectOrPseudoElement, + quirks_data: Option<SelectorIter<'a, Impl>>, +} + +#[inline(always)] +pub fn matches_selector_list<E>( + selector_list: &SelectorList<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, +) -> bool +where + E: Element, +{ + // This is pretty much any(..) but manually inlined because the compiler + // refuses to do so from querySelector / querySelectorAll. + for selector in selector_list.slice() { + let matches = matches_selector(selector, 0, None, element, context); + if matches { + return true; + } + } + + false +} + +#[inline(always)] +fn may_match(hashes: &AncestorHashes, bf: &BloomFilter) -> bool { + // Check the first three hashes. Note that we can check for zero before + // masking off the high bits, since if any of the first three hashes is + // zero the fourth will be as well. We also take care to avoid the + // special-case complexity of the fourth hash until we actually reach it, + // because we usually don't. + // + // To be clear: this is all extremely hot. + for i in 0..3 { + let packed = hashes.packed_hashes[i]; + if packed == 0 { + // No more hashes left - unable to fast-reject. + return true; + } + + if !bf.might_contain_hash(packed & BLOOM_HASH_MASK) { + // Hooray! We fast-rejected on this hash. + return false; + } + } + + // Now do the slighty-more-complex work of synthesizing the fourth hash, + // and check it against the filter if it exists. + let fourth = hashes.fourth_hash(); + fourth == 0 || bf.might_contain_hash(fourth) +} + +/// A result of selector matching, includes 3 failure types, +/// +/// NotMatchedAndRestartFromClosestLaterSibling +/// NotMatchedAndRestartFromClosestDescendant +/// NotMatchedGlobally +/// +/// When NotMatchedGlobally appears, stop selector matching completely since +/// the succeeding selectors never matches. +/// It is raised when +/// Child combinator cannot find the candidate element. +/// Descendant combinator cannot find the candidate element. +/// +/// When NotMatchedAndRestartFromClosestDescendant appears, the selector +/// matching does backtracking and restarts from the closest Descendant +/// combinator. +/// It is raised when +/// NextSibling combinator cannot find the candidate element. +/// LaterSibling combinator cannot find the candidate element. +/// Child combinator doesn't match on the found element. +/// +/// When NotMatchedAndRestartFromClosestLaterSibling appears, the selector +/// matching does backtracking and restarts from the closest LaterSibling +/// combinator. +/// It is raised when +/// NextSibling combinator doesn't match on the found element. +/// +/// For example, when the selector "d1 d2 a" is provided and we cannot *find* +/// an appropriate ancestor element for "d1", this selector matching raises +/// NotMatchedGlobally since even if "d2" is moved to more upper element, the +/// candidates for "d1" becomes less than before and d1 . +/// +/// The next example is siblings. When the selector "b1 + b2 ~ d1 a" is +/// provided and we cannot *find* an appropriate brother element for b1, +/// the selector matching raises NotMatchedAndRestartFromClosestDescendant. +/// The selectors ("b1 + b2 ~") doesn't match and matching restart from "d1". +/// +/// The additional example is child and sibling. When the selector +/// "b1 + c1 > b2 ~ d1 a" is provided and the selector "b1" doesn't match on +/// the element, this "b1" raises NotMatchedAndRestartFromClosestLaterSibling. +/// However since the selector "c1" raises +/// NotMatchedAndRestartFromClosestDescendant. So the selector +/// "b1 + c1 > b2 ~ " doesn't match and restart matching from "d1". +#[derive(Clone, Copy, Eq, PartialEq)] +enum SelectorMatchingResult { + Matched, + NotMatchedAndRestartFromClosestLaterSibling, + NotMatchedAndRestartFromClosestDescendant, + NotMatchedGlobally, +} + +/// Matches a selector, fast-rejecting against a bloom filter. +/// +/// We accept an offset to allow consumers to represent and match against +/// partial selectors (indexed from the right). We use this API design, rather +/// than having the callers pass a SelectorIter, because creating a SelectorIter +/// requires dereferencing the selector to get the length, which adds an +/// unncessary cache miss for cases when we can fast-reject with AncestorHashes +/// (which the caller can store inline with the selector pointer). +#[inline(always)] +pub fn matches_selector<E>( + selector: &Selector<E::Impl>, + offset: usize, + hashes: Option<&AncestorHashes>, + element: &E, + context: &mut MatchingContext<E::Impl>, +) -> bool +where + E: Element, +{ + // Use the bloom filter to fast-reject. + if let Some(hashes) = hashes { + if let Some(filter) = context.bloom_filter { + if !may_match(hashes, filter) { + return false; + } + } + } + matches_complex_selector( + selector.iter_from(offset), + element, + context, + if selector.is_rightmost(offset) { + SubjectOrPseudoElement::Yes + } else { + SubjectOrPseudoElement::No + }, + ) +} + +/// Whether a compound selector matched, and whether it was the rightmost +/// selector inside the complex selector. +pub enum CompoundSelectorMatchingResult { + /// The selector was fully matched. + FullyMatched, + /// The compound selector matched, and the next combinator offset is + /// `next_combinator_offset`. + Matched { next_combinator_offset: usize }, + /// The selector didn't match. + NotMatched, +} + +/// Matches a compound selector belonging to `selector`, starting at offset +/// `from_offset`, matching left to right. +/// +/// Requires that `from_offset` points to a `Combinator`. +/// +/// NOTE(emilio): This doesn't allow to match in the leftmost sequence of the +/// complex selector, but it happens to be the case we don't need it. +pub fn matches_compound_selector_from<E>( + selector: &Selector<E::Impl>, + mut from_offset: usize, + context: &mut MatchingContext<E::Impl>, + element: &E, +) -> CompoundSelectorMatchingResult +where + E: Element, +{ + if cfg!(debug_assertions) && from_offset != 0 { + selector.combinator_at_parse_order(from_offset - 1); // This asserts. + } + + let mut local_context = LocalMatchingContext { + shared: context, + // We have no info if this is an outer selector. This function is called in + // an invalidation context, which only calls this for non-subject (i.e. + // Non-rightmost) positions. + rightmost: SubjectOrPseudoElement::No, + quirks_data: None, + }; + + // Find the end of the selector or the next combinator, then match + // backwards, so that we match in the same order as + // matches_complex_selector, which is usually faster. + let start_offset = from_offset; + for component in selector.iter_raw_parse_order_from(from_offset) { + if matches!(*component, Component::Combinator(..)) { + debug_assert_ne!(from_offset, 0, "Selector started with a combinator?"); + break; + } + + from_offset += 1; + } + + debug_assert!(from_offset >= 1); + debug_assert!(from_offset <= selector.len()); + + let iter = selector.iter_from(selector.len() - from_offset); + debug_assert!( + iter.clone().next().is_some() || + (from_offset != selector.len() && + matches!( + selector.combinator_at_parse_order(from_offset), + Combinator::SlotAssignment | Combinator::PseudoElement + )), + "Got the math wrong: {:?} | {:?} | {} {}", + selector, + selector.iter_raw_match_order().as_slice(), + from_offset, + start_offset + ); + + for component in iter { + if !matches_simple_selector(component, element, &mut local_context) { + return CompoundSelectorMatchingResult::NotMatched; + } + } + + if from_offset != selector.len() { + return CompoundSelectorMatchingResult::Matched { + next_combinator_offset: from_offset, + }; + } + + CompoundSelectorMatchingResult::FullyMatched +} + +/// Matches a complex selector. +#[inline(always)] +fn matches_complex_selector<E>( + mut iter: SelectorIter<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool +where + E: Element, +{ + // If this is the special pseudo-element mode, consume the ::pseudo-element + // before proceeding, since the caller has already handled that part. + if context.matching_mode() == MatchingMode::ForStatelessPseudoElement && !context.is_nested() { + // Consume the pseudo. + match *iter.next().unwrap() { + Component::PseudoElement(ref pseudo) => { + if let Some(ref f) = context.pseudo_element_matching_fn { + if !f(pseudo) { + return false; + } + } + }, + ref other => { + debug_assert!( + false, + "Used MatchingMode::ForStatelessPseudoElement \ + in a non-pseudo selector {:?}", + other + ); + return false; + }, + } + + if !iter.matches_for_stateless_pseudo_element() { + return false; + } + + // Advance to the non-pseudo-element part of the selector. + let next_sequence = iter.next_sequence().unwrap(); + debug_assert_eq!(next_sequence, Combinator::PseudoElement); + } + + let result = matches_complex_selector_internal(iter, element, context, rightmost); + + matches!(result, SelectorMatchingResult::Matched) +} + +/// Matches each selector of a list as a complex selector +fn matches_complex_selector_list<E: Element>( + list: &[Selector<E::Impl>], + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + for selector in list { + if matches_complex_selector(selector.iter(), element, context, rightmost) { + return true; + } + } + false +} + +fn matches_relative_selector<E: Element>( + relative_selector: &RelativeSelector<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + // Overall, we want to mark the path that we've traversed so that when an element + // is invalidated, we early-reject unnecessary relative selector invalidations. + if relative_selector.match_hint.is_descendant_direction() { + if context.needs_selector_flags() { + element.apply_selector_flags( + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR, + ); + } + let mut next_element = element.first_element_child(); + while let Some(el) = next_element { + if context.needs_selector_flags() { + el.apply_selector_flags( + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR, + ); + } + let mut matched = matches_complex_selector( + relative_selector.selector.iter(), + &el, + context, + rightmost, + ); + if !matched && relative_selector.match_hint.is_subtree() { + matched = matches_relative_selector_subtree( + &relative_selector.selector, + &el, + context, + rightmost, + ); + } + if matched { + return true; + } + next_element = el.next_sibling_element(); + } + } else { + debug_assert!( + matches!( + relative_selector.match_hint, + RelativeSelectorMatchHint::InNextSibling | + RelativeSelectorMatchHint::InNextSiblingSubtree | + RelativeSelectorMatchHint::InSibling | + RelativeSelectorMatchHint::InSiblingSubtree + ), + "Not descendant direction, but also not sibling direction?" + ); + if context.needs_selector_flags() { + element.apply_selector_flags( + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_SIBLING, + ); + } + let sibling_flag = if relative_selector.match_hint.is_subtree() { + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR_SIBLING + } else { + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_SIBLING + }; + let mut next_element = element.next_sibling_element(); + while let Some(el) = next_element { + if context.needs_selector_flags() { + el.apply_selector_flags(sibling_flag); + } + let matched = if relative_selector.match_hint.is_subtree() { + matches_relative_selector_subtree( + &relative_selector.selector, + &el, + context, + rightmost, + ) + } else { + matches_complex_selector(relative_selector.selector.iter(), &el, context, rightmost) + }; + if matched { + return true; + } + if relative_selector.match_hint.is_next_sibling() { + break; + } + next_element = el.next_sibling_element(); + } + } + return false; +} + +fn relative_selector_match_early<E: Element>( + selector: &RelativeSelector<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, +) -> Option<bool> { + if context.matching_for_invalidation() { + // In the context of invalidation, we can't use caching/filtering due to + // now/then matches. DOM structure also may have changed, so just pretend + // that we always match. + return Some(!context.in_negation()); + } + // See if we can return a cached result. + if let Some(cached) = context + .selector_caches + .relative_selector + .lookup(element.opaque(), selector) + { + return Some(cached.matched()); + } + // See if we can fast-reject. + if context + .selector_caches + .relative_selector_filter_map + .fast_reject(element, selector, context.quirks_mode()) + { + // Alright, add as unmatched to cache. + context.selector_caches.relative_selector.add( + element.opaque(), + selector, + RelativeSelectorCachedMatch::NotMatched, + ); + return Some(false); + } + None +} + +fn match_relative_selectors<E: Element>( + selectors: &[RelativeSelector<E::Impl>], + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + if context.relative_selector_anchor().is_some() { + // FIXME(emilio): This currently can happen with nesting, and it's not fully + // correct, arguably. But the ideal solution isn't super-clear either. For now, + // cope with it and explicitly reject it at match time. See [1] for discussion. + // + // [1]: https://github.com/w3c/csswg-drafts/issues/9600 + return false; + } + context.nest_for_relative_selector(element.opaque(), |context| { + do_match_relative_selectors(selectors, element, context, rightmost) + }) +} + +/// Matches a relative selector in a list of relative selectors. +fn do_match_relative_selectors<E: Element>( + selectors: &[RelativeSelector<E::Impl>], + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + // Due to style sharing implications (See style sharing code), we mark the current styling context + // to mark elements considered for :has matching. Additionally, we want to mark the elements themselves, + // since we don't want to indiscriminately invalidate every element as a potential anchor. + if rightmost == SubjectOrPseudoElement::Yes { + context.considered_relative_selector.considered_anchor(); + if context.needs_selector_flags() { + element.apply_selector_flags(ElementSelectorFlags::ANCHORS_RELATIVE_SELECTOR); + } + } else { + context.considered_relative_selector.considered(); + if context.needs_selector_flags() { + element + .apply_selector_flags(ElementSelectorFlags::ANCHORS_RELATIVE_SELECTOR_NON_SUBJECT); + } + } + + for relative_selector in selectors.iter() { + if let Some(result) = relative_selector_match_early(relative_selector, element, context) { + if result { + return true; + } + // Early return indicates no match, continue to next selector. + continue; + } + + let matched = matches_relative_selector(relative_selector, element, context, rightmost); + context.selector_caches.relative_selector.add( + element.opaque(), + relative_selector, + if matched { + RelativeSelectorCachedMatch::Matched + } else { + RelativeSelectorCachedMatch::NotMatched + }, + ); + if matched { + return true; + } + } + + false +} + +fn matches_relative_selector_subtree<E: Element>( + selector: &Selector<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + let mut current = element.first_element_child(); + + while let Some(el) = current { + if context.needs_selector_flags() { + el.apply_selector_flags( + ElementSelectorFlags::RELATIVE_SELECTOR_SEARCH_DIRECTION_ANCESTOR, + ); + } + if matches_complex_selector(selector.iter(), &el, context, rightmost) { + return true; + } + + if matches_relative_selector_subtree(selector, &el, context, rightmost) { + return true; + } + + current = el.next_sibling_element(); + } + + false +} + +/// Whether the :hover and :active quirk applies. +/// +/// https://quirks.spec.whatwg.org/#the-active-and-hover-quirk +fn hover_and_active_quirk_applies<Impl: SelectorImpl>( + selector_iter: &SelectorIter<Impl>, + context: &MatchingContext<Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool { + if context.quirks_mode() != QuirksMode::Quirks { + return false; + } + + if context.is_nested() { + return false; + } + + // This compound selector had a pseudo-element to the right that we + // intentionally skipped. + if rightmost == SubjectOrPseudoElement::Yes && + context.matching_mode() == MatchingMode::ForStatelessPseudoElement + { + return false; + } + + selector_iter.clone().all(|simple| match *simple { + Component::LocalName(_) | + Component::AttributeInNoNamespaceExists { .. } | + Component::AttributeInNoNamespace { .. } | + Component::AttributeOther(_) | + Component::ID(_) | + Component::Class(_) | + Component::PseudoElement(_) | + Component::Negation(_) | + Component::Empty | + Component::Nth(_) | + Component::NthOf(_) => false, + Component::NonTSPseudoClass(ref pseudo_class) => pseudo_class.is_active_or_hover(), + _ => true, + }) +} + +#[derive(Clone, Copy, PartialEq)] +enum SubjectOrPseudoElement { + Yes, + No, +} + +fn host_for_part<E>(element: &E, context: &MatchingContext<E::Impl>) -> Option<E> +where + E: Element, +{ + let scope = context.current_host; + let mut curr = element.containing_shadow_host()?; + if scope == Some(curr.opaque()) { + return Some(curr); + } + loop { + let parent = curr.containing_shadow_host(); + if parent.as_ref().map(|h| h.opaque()) == scope { + return Some(curr); + } + curr = parent?; + } +} + +fn assigned_slot<E>(element: &E, context: &MatchingContext<E::Impl>) -> Option<E> +where + E: Element, +{ + debug_assert!(element + .assigned_slot() + .map_or(true, |s| s.is_html_slot_element())); + let scope = context.current_host?; + let mut current_slot = element.assigned_slot()?; + while current_slot.containing_shadow_host().unwrap().opaque() != scope { + current_slot = current_slot.assigned_slot()?; + } + Some(current_slot) +} + +#[inline(always)] +fn next_element_for_combinator<E>( + element: &E, + combinator: Combinator, + selector: &SelectorIter<E::Impl>, + context: &MatchingContext<E::Impl>, +) -> Option<E> +where + E: Element, +{ + match combinator { + Combinator::NextSibling | Combinator::LaterSibling => element.prev_sibling_element(), + Combinator::Child | Combinator::Descendant => { + match element.parent_element() { + Some(e) => return Some(e), + None => {}, + } + + if !element.parent_node_is_shadow_root() { + return None; + } + + // https://drafts.csswg.org/css-scoping/#host-element-in-tree: + // + // For the purpose of Selectors, a shadow host also appears in + // its shadow tree, with the contents of the shadow tree treated + // as its children. (In other words, the shadow host is treated as + // replacing the shadow root node.) + // + // and also: + // + // When considered within its own shadow trees, the shadow host is + // featureless. Only the :host, :host(), and :host-context() + // pseudo-classes are allowed to match it. + // + // Since we know that the parent is a shadow root, we necessarily + // are in a shadow tree of the host, and the next selector will only + // match if the selector is a featureless :host selector. + if !selector.clone().is_featureless_host_selector() { + return None; + } + + element.containing_shadow_host() + }, + Combinator::Part => host_for_part(element, context), + Combinator::SlotAssignment => assigned_slot(element, context), + Combinator::PseudoElement => element.pseudo_element_originating_element(), + } +} + +fn matches_complex_selector_internal<E>( + mut selector_iter: SelectorIter<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> SelectorMatchingResult +where + E: Element, +{ + debug!( + "Matching complex selector {:?} for {:?}", + selector_iter, element + ); + + let matches_compound_selector = + matches_compound_selector(&mut selector_iter, element, context, rightmost); + + let combinator = selector_iter.next_sequence(); + if combinator.map_or(false, |c| c.is_sibling()) { + if context.needs_selector_flags() { + element.apply_selector_flags(ElementSelectorFlags::HAS_SLOW_SELECTOR_LATER_SIBLINGS); + } + } + + if !matches_compound_selector { + return SelectorMatchingResult::NotMatchedAndRestartFromClosestLaterSibling; + } + + let combinator = match combinator { + None => return SelectorMatchingResult::Matched, + Some(c) => c, + }; + + let (candidate_not_found, mut rightmost) = match combinator { + Combinator::NextSibling | Combinator::LaterSibling => { + (SelectorMatchingResult::NotMatchedAndRestartFromClosestDescendant, SubjectOrPseudoElement::No) + }, + Combinator::Child | + Combinator::Descendant | + Combinator::SlotAssignment | + Combinator::Part => (SelectorMatchingResult::NotMatchedGlobally, SubjectOrPseudoElement::No), + Combinator::PseudoElement => (SelectorMatchingResult::NotMatchedGlobally, rightmost), + }; + + // Stop matching :visited as soon as we find a link, or a combinator for + // something that isn't an ancestor. + let mut visited_handling = if combinator.is_sibling() { + VisitedHandlingMode::AllLinksUnvisited + } else { + context.visited_handling() + }; + + let mut element = element.clone(); + loop { + if element.is_link() { + visited_handling = VisitedHandlingMode::AllLinksUnvisited; + } + + element = match next_element_for_combinator(&element, combinator, &selector_iter, &context) + { + None => return candidate_not_found, + Some(next_element) => next_element, + }; + + let result = context.with_visited_handling_mode(visited_handling, |context| { + matches_complex_selector_internal( + selector_iter.clone(), + &element, + context, + rightmost, + ) + }); + + if !matches!(combinator, Combinator::PseudoElement) { + rightmost = SubjectOrPseudoElement::No; + } + + match (result, combinator) { + // Return the status immediately. + (SelectorMatchingResult::Matched, _) | + (SelectorMatchingResult::NotMatchedGlobally, _) | + (_, Combinator::NextSibling) => { + return result; + }, + + // Upgrade the failure status to + // NotMatchedAndRestartFromClosestDescendant. + (_, Combinator::PseudoElement) | (_, Combinator::Child) => { + return SelectorMatchingResult::NotMatchedAndRestartFromClosestDescendant; + }, + + // If the failure status is + // NotMatchedAndRestartFromClosestDescendant and combinator is + // Combinator::LaterSibling, give up this Combinator::LaterSibling + // matching and restart from the closest descendant combinator. + ( + SelectorMatchingResult::NotMatchedAndRestartFromClosestDescendant, + Combinator::LaterSibling, + ) => { + return result; + }, + + // The Combinator::Descendant combinator and the status is + // NotMatchedAndRestartFromClosestLaterSibling or + // NotMatchedAndRestartFromClosestDescendant, or the + // Combinator::LaterSibling combinator and the status is + // NotMatchedAndRestartFromClosestDescendant, we can continue to + // matching on the next candidate element. + _ => {}, + } + } +} + +#[inline] +fn matches_local_name<E>(element: &E, local_name: &LocalName<E::Impl>) -> bool +where + E: Element, +{ + let name = select_name(element, &local_name.name, &local_name.lower_name).borrow(); + element.has_local_name(name) +} + +fn matches_part<E>( + element: &E, + parts: &[<E::Impl as SelectorImpl>::Identifier], + context: &mut MatchingContext<E::Impl>, +) -> bool +where + E: Element, +{ + let mut hosts = SmallVec::<[E; 4]>::new(); + + let mut host = match element.containing_shadow_host() { + Some(h) => h, + None => return false, + }; + + let current_host = context.current_host; + if current_host != Some(host.opaque()) { + loop { + let outer_host = host.containing_shadow_host(); + if outer_host.as_ref().map(|h| h.opaque()) == current_host { + break; + } + let outer_host = match outer_host { + Some(h) => h, + None => return false, + }; + // TODO(emilio): if worth it, we could early return if + // host doesn't have the exportparts attribute. + hosts.push(host); + host = outer_host; + } + } + + // Translate the part into the right scope. + parts.iter().all(|part| { + let mut part = part.clone(); + for host in hosts.iter().rev() { + part = match host.imported_part(&part) { + Some(p) => p, + None => return false, + }; + } + element.is_part(&part) + }) +} + +fn matches_host<E>( + element: &E, + selector: Option<&Selector<E::Impl>>, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool +where + E: Element, +{ + let host = match context.shadow_host() { + Some(h) => h, + None => return false, + }; + if host != element.opaque() { + return false; + } + selector.map_or(true, |selector| { + context + .nest(|context| matches_complex_selector(selector.iter(), element, context, rightmost)) + }) +} + +fn matches_slotted<E>( + element: &E, + selector: &Selector<E::Impl>, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool +where + E: Element, +{ + // <slots> are never flattened tree slottables. + if element.is_html_slot_element() { + return false; + } + context.nest(|context| matches_complex_selector(selector.iter(), element, context, rightmost)) +} + +fn matches_rare_attribute_selector<E>( + element: &E, + attr_sel: &AttrSelectorWithOptionalNamespace<E::Impl>, +) -> bool +where + E: Element, +{ + let empty_string; + let namespace = match attr_sel.namespace() { + Some(ns) => ns, + None => { + empty_string = crate::parser::namespace_empty_string::<E::Impl>(); + NamespaceConstraint::Specific(&empty_string) + }, + }; + element.attr_matches( + &namespace, + select_name(element, &attr_sel.local_name, &attr_sel.local_name_lower), + &match attr_sel.operation { + ParsedAttrSelectorOperation::Exists => AttrSelectorOperation::Exists, + ParsedAttrSelectorOperation::WithValue { + operator, + case_sensitivity, + ref value, + } => AttrSelectorOperation::WithValue { + operator, + case_sensitivity: to_unconditional_case_sensitivity(case_sensitivity, element), + value, + }, + }, + ) +} + +/// Determines whether the given element matches the given compound selector. +#[inline] +fn matches_compound_selector<E>( + selector_iter: &mut SelectorIter<E::Impl>, + element: &E, + context: &mut MatchingContext<E::Impl>, + rightmost: SubjectOrPseudoElement, +) -> bool +where + E: Element, +{ + let quirks_data = if context.quirks_mode() == QuirksMode::Quirks { + Some(selector_iter.clone()) + } else { + None + }; + let mut local_context = LocalMatchingContext { + shared: context, + rightmost, + quirks_data, + }; + selector_iter.all(|simple| matches_simple_selector(simple, element, &mut local_context)) +} + +/// Determines whether the given element matches the given single selector. +fn matches_simple_selector<E>( + selector: &Component<E::Impl>, + element: &E, + context: &mut LocalMatchingContext<E::Impl>, +) -> bool +where + E: Element, +{ + debug_assert!(context.shared.is_nested() || !context.shared.in_negation()); + let rightmost = context.rightmost; + match *selector { + Component::ID(ref id) => { + element.has_id(id, context.shared.classes_and_ids_case_sensitivity()) + }, + Component::Class(ref class) => { + element.has_class(class, context.shared.classes_and_ids_case_sensitivity()) + }, + Component::LocalName(ref local_name) => matches_local_name(element, local_name), + Component::AttributeInNoNamespaceExists { + ref local_name, + ref local_name_lower, + } => element.has_attr_in_no_namespace(select_name(element, local_name, local_name_lower)), + Component::AttributeInNoNamespace { + ref local_name, + ref value, + operator, + case_sensitivity, + } => element.attr_matches( + &NamespaceConstraint::Specific(&crate::parser::namespace_empty_string::<E::Impl>()), + local_name, + &AttrSelectorOperation::WithValue { + operator, + case_sensitivity: to_unconditional_case_sensitivity(case_sensitivity, element), + value, + }, + ), + Component::AttributeOther(ref attr_sel) => { + matches_rare_attribute_selector(element, attr_sel) + }, + Component::Part(ref parts) => matches_part(element, parts, &mut context.shared), + Component::Slotted(ref selector) => { + matches_slotted(element, selector, &mut context.shared, rightmost) + }, + Component::PseudoElement(ref pseudo) => { + element.match_pseudo_element(pseudo, context.shared) + }, + Component::ExplicitUniversalType | Component::ExplicitAnyNamespace => true, + Component::Namespace(_, ref url) | Component::DefaultNamespace(ref url) => { + element.has_namespace(&url.borrow()) + }, + Component::ExplicitNoNamespace => { + let ns = crate::parser::namespace_empty_string::<E::Impl>(); + element.has_namespace(&ns.borrow()) + }, + Component::NonTSPseudoClass(ref pc) => { + if let Some(ref iter) = context.quirks_data { + if pc.is_active_or_hover() && + !element.is_link() && + hover_and_active_quirk_applies(iter, context.shared, context.rightmost) + { + return false; + } + } + element.match_non_ts_pseudo_class(pc, &mut context.shared) + }, + Component::Root => element.is_root(), + Component::Empty => { + if context.shared.needs_selector_flags() { + element.apply_selector_flags(ElementSelectorFlags::HAS_EMPTY_SELECTOR); + } + element.is_empty() + }, + Component::Host(ref selector) => { + matches_host(element, selector.as_ref(), &mut context.shared, rightmost) + }, + Component::ParentSelector | Component::Scope => match context.shared.scope_element { + Some(ref scope_element) => element.opaque() == *scope_element, + None => element.is_root(), + }, + Component::Nth(ref nth_data) => { + matches_generic_nth_child(element, context.shared, nth_data, &[], rightmost) + }, + Component::NthOf(ref nth_of_data) => context.shared.nest(|context| { + matches_generic_nth_child( + element, + context, + nth_of_data.nth_data(), + nth_of_data.selectors(), + rightmost, + ) + }), + Component::Is(ref list) | Component::Where(ref list) => context.shared.nest(|context| { + matches_complex_selector_list(list.slice(), element, context, rightmost) + }), + Component::Negation(ref list) => context.shared.nest_for_negation(|context| { + !matches_complex_selector_list(list.slice(), element, context, rightmost) + }), + Component::Has(ref relative_selectors) => { + match_relative_selectors(relative_selectors, element, context.shared, rightmost) + }, + Component::Combinator(_) => unsafe { + debug_unreachable!("Shouldn't try to selector-match combinators") + }, + Component::RelativeSelectorAnchor => { + let anchor = context.shared.relative_selector_anchor(); + // We may match inner relative selectors, in which case we want to always match. + anchor.map_or(true, |a| a == element.opaque()) + }, + Component::Invalid(..) => false, + } +} + +#[inline(always)] +pub fn select_name<'a, E: Element, T: PartialEq>( + element: &E, + local_name: &'a T, + local_name_lower: &'a T, +) -> &'a T { + if local_name == local_name_lower || element.is_html_element_in_html_document() { + local_name_lower + } else { + local_name + } +} + +#[inline(always)] +pub fn to_unconditional_case_sensitivity<'a, E: Element>( + parsed: ParsedCaseSensitivity, + element: &E, +) -> CaseSensitivity { + match parsed { + ParsedCaseSensitivity::CaseSensitive | ParsedCaseSensitivity::ExplicitCaseSensitive => { + CaseSensitivity::CaseSensitive + }, + ParsedCaseSensitivity::AsciiCaseInsensitive => CaseSensitivity::AsciiCaseInsensitive, + ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => { + if element.is_html_element_in_html_document() { + CaseSensitivity::AsciiCaseInsensitive + } else { + CaseSensitivity::CaseSensitive + } + }, + } +} + +fn matches_generic_nth_child<E>( + element: &E, + context: &mut MatchingContext<E::Impl>, + nth_data: &NthSelectorData, + selectors: &[Selector<E::Impl>], + rightmost: SubjectOrPseudoElement, +) -> bool +where + E: Element, +{ + if element.ignores_nth_child_selectors() { + return false; + } + let has_selectors = !selectors.is_empty(); + let selectors_match = + !has_selectors || matches_complex_selector_list(selectors, element, context, rightmost); + if context.matching_for_invalidation() { + // Skip expensive indexing math in invalidation. + return selectors_match && !context.in_negation(); + } + + let NthSelectorData { ty, a, b, .. } = *nth_data; + let is_of_type = ty.is_of_type(); + if ty.is_only() { + debug_assert!( + !has_selectors, + ":only-child and :only-of-type cannot have a selector list!" + ); + return matches_generic_nth_child( + element, + context, + &NthSelectorData::first(is_of_type), + selectors, + rightmost, + ) && matches_generic_nth_child( + element, + context, + &NthSelectorData::last(is_of_type), + selectors, + rightmost, + ); + } + + let is_from_end = ty.is_from_end(); + + // It's useful to know whether this can only select the first/last element + // child for optimization purposes, see the `HAS_EDGE_CHILD_SELECTOR` flag. + let is_edge_child_selector = nth_data.is_simple_edge() && !has_selectors; + + if context.needs_selector_flags() { + let mut flags = if is_edge_child_selector { + ElementSelectorFlags::HAS_EDGE_CHILD_SELECTOR + } else if is_from_end { + ElementSelectorFlags::HAS_SLOW_SELECTOR + } else { + ElementSelectorFlags::HAS_SLOW_SELECTOR_LATER_SIBLINGS + }; + flags |= if has_selectors { + ElementSelectorFlags::HAS_SLOW_SELECTOR_NTH_OF + } else { + ElementSelectorFlags::HAS_SLOW_SELECTOR_NTH + }; + element.apply_selector_flags(flags); + } + + if !selectors_match { + return false; + } + + // :first/last-child are rather trivial to match, don't bother with the + // cache. + if is_edge_child_selector { + return if is_from_end { + element.next_sibling_element() + } else { + element.prev_sibling_element() + } + .is_none(); + } + + // Lookup or compute the index. + let index = if let Some(i) = context + .nth_index_cache(is_of_type, is_from_end, selectors) + .lookup(element.opaque()) + { + i + } else { + let i = nth_child_index( + element, + context, + selectors, + is_of_type, + is_from_end, + /* check_cache = */ true, + rightmost, + ); + context + .nth_index_cache(is_of_type, is_from_end, selectors) + .insert(element.opaque(), i); + i + }; + debug_assert_eq!( + index, + nth_child_index( + element, + context, + selectors, + is_of_type, + is_from_end, + /* check_cache = */ false, + rightmost, + ), + "invalid cache" + ); + + // Is there a non-negative integer n such that An+B=index? + match index.checked_sub(b) { + None => false, + Some(an) => match an.checked_div(a) { + Some(n) => n >= 0 && a * n == an, + None /* a == 0 */ => an == 0, + }, + } +} + +#[inline] +fn nth_child_index<E>( + element: &E, + context: &mut MatchingContext<E::Impl>, + selectors: &[Selector<E::Impl>], + is_of_type: bool, + is_from_end: bool, + check_cache: bool, + rightmost: SubjectOrPseudoElement, +) -> i32 +where + E: Element, +{ + // The traversal mostly processes siblings left to right. So when we walk + // siblings to the right when computing NthLast/NthLastOfType we're unlikely + // to get cache hits along the way. As such, we take the hit of walking the + // siblings to the left checking the cache in the is_from_end case (this + // matches what Gecko does). The indices-from-the-left is handled during the + // regular look further below. + if check_cache && + is_from_end && + !context + .nth_index_cache(is_of_type, is_from_end, selectors) + .is_empty() + { + let mut index: i32 = 1; + let mut curr = element.clone(); + while let Some(e) = curr.prev_sibling_element() { + curr = e; + let matches = if is_of_type { + element.is_same_type(&curr) + } else if !selectors.is_empty() { + matches_complex_selector_list(selectors, &curr, context, rightmost) + } else { + true + }; + if !matches { + continue; + } + if let Some(i) = context + .nth_index_cache(is_of_type, is_from_end, selectors) + .lookup(curr.opaque()) + { + return i - index; + } + index += 1; + } + } + + let mut index: i32 = 1; + let mut curr = element.clone(); + let next = |e: E| { + if is_from_end { + e.next_sibling_element() + } else { + e.prev_sibling_element() + } + }; + while let Some(e) = next(curr) { + curr = e; + let matches = if is_of_type { + element.is_same_type(&curr) + } else if !selectors.is_empty() { + matches_complex_selector_list(selectors, &curr, context, rightmost) + } else { + true + }; + if !matches { + continue; + } + // If we're computing indices from the left, check each element in the + // cache. We handle the indices-from-the-right case at the top of this + // function. + if !is_from_end && check_cache { + if let Some(i) = context + .nth_index_cache(is_of_type, is_from_end, selectors) + .lookup(curr.opaque()) + { + return i + index; + } + } + index += 1; + } + + index +} diff --git a/servo/components/selectors/nth_index_cache.rs b/servo/components/selectors/nth_index_cache.rs new file mode 100644 index 0000000000..b4b41578d0 --- /dev/null +++ b/servo/components/selectors/nth_index_cache.rs @@ -0,0 +1,102 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use std::hash::Hash; + +use crate::{parser::Selector, tree::OpaqueElement, SelectorImpl}; +use fxhash::FxHashMap; + +/// A cache to speed up matching of nth-index-like selectors. +/// +/// See [1] for some discussion around the design tradeoffs. +/// +/// [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1401855#c3 +#[derive(Default)] +pub struct NthIndexCache { + nth: NthIndexCacheInner, + nth_of_selectors: NthIndexOfSelectorsCaches, + nth_last: NthIndexCacheInner, + nth_last_of_selectors: NthIndexOfSelectorsCaches, + nth_of_type: NthIndexCacheInner, + nth_last_of_type: NthIndexCacheInner, +} + +impl NthIndexCache { + /// Gets the appropriate cache for the given parameters. + pub fn get<Impl: SelectorImpl>( + &mut self, + is_of_type: bool, + is_from_end: bool, + selectors: &[Selector<Impl>], + ) -> &mut NthIndexCacheInner { + if is_of_type { + return if is_from_end { + &mut self.nth_last_of_type + } else { + &mut self.nth_of_type + }; + } + if !selectors.is_empty() { + return if is_from_end { + self.nth_last_of_selectors.lookup(selectors) + } else { + self.nth_of_selectors.lookup(selectors) + }; + } + if is_from_end { + &mut self.nth_last + } else { + &mut self.nth + } + } +} + +#[derive(Hash, Eq, PartialEq)] +struct SelectorListCacheKey(usize); + +/// Use the selector list's pointer as the cache key +impl SelectorListCacheKey { + // :nth-child of selectors are reference-counted with `ThinArc`, so we know their pointers are stable. + fn new<Impl: SelectorImpl>(selectors: &[Selector<Impl>]) -> Self { + Self(selectors.as_ptr() as usize) + } +} + +/// Use a different map of cached indices per :nth-child's or :nth-last-child's selector list +#[derive(Default)] +pub struct NthIndexOfSelectorsCaches(FxHashMap<SelectorListCacheKey, NthIndexCacheInner>); + +/// Get or insert a map of cached incides for the selector list of this +/// particular :nth-child or :nth-last-child pseudoclass +impl NthIndexOfSelectorsCaches { + pub fn lookup<Impl: SelectorImpl>( + &mut self, + selectors: &[Selector<Impl>], + ) -> &mut NthIndexCacheInner { + self.0 + .entry(SelectorListCacheKey::new(selectors)) + .or_default() + } +} + +/// The concrete per-pseudo-class cache. +#[derive(Default)] +pub struct NthIndexCacheInner(FxHashMap<OpaqueElement, i32>); + +impl NthIndexCacheInner { + /// Does a lookup for a given element in the cache. + pub fn lookup(&mut self, el: OpaqueElement) -> Option<i32> { + self.0.get(&el).copied() + } + + /// Inserts an entry into the cache. + pub fn insert(&mut self, element: OpaqueElement, index: i32) { + self.0.insert(element, index); + } + + /// Returns whether the cache is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} diff --git a/servo/components/selectors/parser.rs b/servo/components/selectors/parser.rs new file mode 100644 index 0000000000..792d4eb8bc --- /dev/null +++ b/servo/components/selectors/parser.rs @@ -0,0 +1,4483 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::attr::{AttrSelectorOperator, AttrSelectorWithOptionalNamespace}; +use crate::attr::{NamespaceConstraint, ParsedAttrSelectorOperation, ParsedCaseSensitivity}; +use crate::bloom::BLOOM_HASH_MASK; +use crate::builder::{ + relative_selector_list_specificity_and_flags, selector_list_specificity_and_flags, + SelectorBuilder, SelectorFlags, Specificity, SpecificityAndFlags, +}; +use crate::context::QuirksMode; +use crate::sink::Push; +use crate::visitor::SelectorListKind; +pub use crate::visitor::SelectorVisitor; +use cssparser::parse_nth; +use cssparser::{BasicParseError, BasicParseErrorKind, ParseError, ParseErrorKind}; +use cssparser::{CowRcStr, Delimiter, SourceLocation}; +use cssparser::{Parser as CssParser, ToCss, Token}; +use precomputed_hash::PrecomputedHash; +use servo_arc::{Arc, ArcUnionBorrow, ThinArc, ThinArcUnion, UniqueArc}; +use smallvec::SmallVec; +use std::borrow::{Borrow, Cow}; +use std::fmt::{self, Debug}; +use std::iter::Rev; +use std::slice; + +/// A trait that represents a pseudo-element. +pub trait PseudoElement: Sized + ToCss { + /// The `SelectorImpl` this pseudo-element is used for. + type Impl: SelectorImpl; + + /// Whether the pseudo-element supports a given state selector to the right + /// of it. + fn accepts_state_pseudo_classes(&self) -> bool { + false + } + + /// Whether this pseudo-element is valid after a ::slotted(..) pseudo. + fn valid_after_slotted(&self) -> bool { + false + } +} + +/// A trait that represents a pseudo-class. +pub trait NonTSPseudoClass: Sized + ToCss { + /// The `SelectorImpl` this pseudo-element is used for. + type Impl: SelectorImpl; + + /// Whether this pseudo-class is :active or :hover. + fn is_active_or_hover(&self) -> bool; + + /// Whether this pseudo-class belongs to: + /// + /// https://drafts.csswg.org/selectors-4/#useraction-pseudos + fn is_user_action_state(&self) -> bool; + + fn visit<V>(&self, _visitor: &mut V) -> bool + where + V: SelectorVisitor<Impl = Self::Impl>, + { + true + } +} + +/// Returns a Cow::Borrowed if `s` is already ASCII lowercase, and a +/// Cow::Owned if `s` had to be converted into ASCII lowercase. +fn to_ascii_lowercase(s: &str) -> Cow<str> { + if let Some(first_uppercase) = s.bytes().position(|byte| byte >= b'A' && byte <= b'Z') { + let mut string = s.to_owned(); + string[first_uppercase..].make_ascii_lowercase(); + string.into() + } else { + s.into() + } +} + +bitflags! { + /// Flags that indicate at which point of parsing a selector are we. + #[derive(Copy, Clone)] + struct SelectorParsingState: u8 { + /// Whether we should avoid adding default namespaces to selectors that + /// aren't type or universal selectors. + const SKIP_DEFAULT_NAMESPACE = 1 << 0; + + /// Whether we've parsed a ::slotted() pseudo-element already. + /// + /// If so, then we can only parse a subset of pseudo-elements, and + /// whatever comes after them if so. + const AFTER_SLOTTED = 1 << 1; + /// Whether we've parsed a ::part() pseudo-element already. + /// + /// If so, then we can only parse a subset of pseudo-elements, and + /// whatever comes after them if so. + const AFTER_PART = 1 << 2; + /// Whether we've parsed a pseudo-element (as in, an + /// `Impl::PseudoElement` thus not accounting for `::slotted` or + /// `::part`) already. + /// + /// If so, then other pseudo-elements and most other selectors are + /// disallowed. + const AFTER_PSEUDO_ELEMENT = 1 << 3; + /// Whether we've parsed a non-stateful pseudo-element (again, as-in + /// `Impl::PseudoElement`) already. If so, then other pseudo-classes are + /// disallowed. If this flag is set, `AFTER_PSEUDO_ELEMENT` must be set + /// as well. + const AFTER_NON_STATEFUL_PSEUDO_ELEMENT = 1 << 4; + + /// Whether we are after any of the pseudo-like things. + const AFTER_PSEUDO = Self::AFTER_PART.bits() | Self::AFTER_SLOTTED.bits() | Self::AFTER_PSEUDO_ELEMENT.bits(); + + /// Whether we explicitly disallow combinators. + const DISALLOW_COMBINATORS = 1 << 5; + + /// Whether we explicitly disallow pseudo-element-like things. + const DISALLOW_PSEUDOS = 1 << 6; + + /// Whether we explicitly disallow relative selectors (i.e. `:has()`). + const DISALLOW_RELATIVE_SELECTOR = 1 << 7; + } +} + +impl SelectorParsingState { + #[inline] + fn allows_pseudos(self) -> bool { + // NOTE(emilio): We allow pseudos after ::part and such. + !self.intersects(Self::AFTER_PSEUDO_ELEMENT | Self::DISALLOW_PSEUDOS) + } + + #[inline] + fn allows_slotted(self) -> bool { + !self.intersects(Self::AFTER_PSEUDO | Self::DISALLOW_PSEUDOS) + } + + #[inline] + fn allows_part(self) -> bool { + !self.intersects(Self::AFTER_PSEUDO | Self::DISALLOW_PSEUDOS) + } + + #[inline] + fn allows_non_functional_pseudo_classes(self) -> bool { + !self.intersects(Self::AFTER_SLOTTED | Self::AFTER_NON_STATEFUL_PSEUDO_ELEMENT) + } + + #[inline] + fn allows_tree_structural_pseudo_classes(self) -> bool { + !self.intersects(Self::AFTER_PSEUDO) + } + + #[inline] + fn allows_combinators(self) -> bool { + !self.intersects(Self::DISALLOW_COMBINATORS) + } +} + +pub type SelectorParseError<'i> = ParseError<'i, SelectorParseErrorKind<'i>>; + +#[derive(Clone, Debug, PartialEq)] +pub enum SelectorParseErrorKind<'i> { + NoQualifiedNameInAttributeSelector(Token<'i>), + EmptySelector, + DanglingCombinator, + NonCompoundSelector, + NonPseudoElementAfterSlotted, + InvalidPseudoElementAfterSlotted, + InvalidPseudoElementInsideWhere, + InvalidState, + UnexpectedTokenInAttributeSelector(Token<'i>), + PseudoElementExpectedColon(Token<'i>), + PseudoElementExpectedIdent(Token<'i>), + NoIdentForPseudo(Token<'i>), + UnsupportedPseudoClassOrElement(CowRcStr<'i>), + UnexpectedIdent(CowRcStr<'i>), + ExpectedNamespace(CowRcStr<'i>), + ExpectedBarInAttr(Token<'i>), + BadValueInAttr(Token<'i>), + InvalidQualNameInAttr(Token<'i>), + ExplicitNamespaceUnexpectedToken(Token<'i>), + ClassNeedsIdent(Token<'i>), +} + +macro_rules! with_all_bounds { + ( + [ $( $InSelector: tt )* ] + [ $( $CommonBounds: tt )* ] + [ $( $FromStr: tt )* ] + ) => { + /// This trait allows to define the parser implementation in regards + /// of pseudo-classes/elements + /// + /// NB: We need Clone so that we can derive(Clone) on struct with that + /// are parameterized on SelectorImpl. See + /// <https://github.com/rust-lang/rust/issues/26925> + pub trait SelectorImpl: Clone + Debug + Sized + 'static { + type ExtraMatchingData<'a>: Sized + Default; + type AttrValue: $($InSelector)*; + type Identifier: $($InSelector)* + PrecomputedHash; + type LocalName: $($InSelector)* + Borrow<Self::BorrowedLocalName> + PrecomputedHash; + type NamespaceUrl: $($CommonBounds)* + Default + Borrow<Self::BorrowedNamespaceUrl> + PrecomputedHash; + type NamespacePrefix: $($InSelector)* + Default; + type BorrowedNamespaceUrl: ?Sized + Eq; + type BorrowedLocalName: ?Sized + Eq; + + /// non tree-structural pseudo-classes + /// (see: https://drafts.csswg.org/selectors/#structural-pseudos) + type NonTSPseudoClass: $($CommonBounds)* + NonTSPseudoClass<Impl = Self>; + + /// pseudo-elements + type PseudoElement: $($CommonBounds)* + PseudoElement<Impl = Self>; + + /// Whether attribute hashes should be collected for filtering + /// purposes. + fn should_collect_attr_hash(_name: &Self::LocalName) -> bool { + false + } + } + } +} + +macro_rules! with_bounds { + ( [ $( $CommonBounds: tt )* ] [ $( $FromStr: tt )* ]) => { + with_all_bounds! { + [$($CommonBounds)* + $($FromStr)* + ToCss] + [$($CommonBounds)*] + [$($FromStr)*] + } + } +} + +with_bounds! { + [Clone + Eq] + [for<'a> From<&'a str>] +} + +pub trait Parser<'i> { + type Impl: SelectorImpl; + type Error: 'i + From<SelectorParseErrorKind<'i>>; + + /// Whether to parse the `::slotted()` pseudo-element. + fn parse_slotted(&self) -> bool { + false + } + + /// Whether to parse the `::part()` pseudo-element. + fn parse_part(&self) -> bool { + false + } + + /// Whether to parse the selector list of nth-child() or nth-last-child(). + fn parse_nth_child_of(&self) -> bool { + false + } + + /// Whether to parse the `:where` pseudo-class. + fn parse_is_and_where(&self) -> bool { + false + } + + /// Whether to parse the :has pseudo-class. + fn parse_has(&self) -> bool { + false + } + + /// Whether to parse the '&' delimiter as a parent selector. + fn parse_parent_selector(&self) -> bool { + false + } + + /// Whether the given function name is an alias for the `:is()` function. + fn is_is_alias(&self, _name: &str) -> bool { + false + } + + /// Whether to parse the `:host` pseudo-class. + fn parse_host(&self) -> bool { + false + } + + /// Whether to allow forgiving selector-list parsing. + fn allow_forgiving_selectors(&self) -> bool { + true + } + + /// This function can return an "Err" pseudo-element in order to support CSS2.1 + /// pseudo-elements. + fn parse_non_ts_pseudo_class( + &self, + location: SourceLocation, + name: CowRcStr<'i>, + ) -> Result<<Self::Impl as SelectorImpl>::NonTSPseudoClass, ParseError<'i, Self::Error>> { + Err( + location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_non_ts_functional_pseudo_class<'t>( + &self, + name: CowRcStr<'i>, + parser: &mut CssParser<'i, 't>, + _after_part: bool, + ) -> Result<<Self::Impl as SelectorImpl>::NonTSPseudoClass, ParseError<'i, Self::Error>> { + Err( + parser.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_pseudo_element( + &self, + location: SourceLocation, + name: CowRcStr<'i>, + ) -> Result<<Self::Impl as SelectorImpl>::PseudoElement, ParseError<'i, Self::Error>> { + Err( + location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_functional_pseudo_element<'t>( + &self, + name: CowRcStr<'i>, + arguments: &mut CssParser<'i, 't>, + ) -> Result<<Self::Impl as SelectorImpl>::PseudoElement, ParseError<'i, Self::Error>> { + Err( + arguments.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn default_namespace(&self) -> Option<<Self::Impl as SelectorImpl>::NamespaceUrl> { + None + } + + fn namespace_for_prefix( + &self, + _prefix: &<Self::Impl as SelectorImpl>::NamespacePrefix, + ) -> Option<<Self::Impl as SelectorImpl>::NamespaceUrl> { + None + } +} + +/// A selector list is a tagged pointer with either a single selector, or a ThinArc<()> of multiple +/// selectors. +#[derive(Clone, Eq, Debug, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct SelectorList<Impl: SelectorImpl>( + #[shmem(field_bound)] ThinArcUnion<SpecificityAndFlags, Component<Impl>, (), Selector<Impl>>, +); + +impl<Impl: SelectorImpl> SelectorList<Impl> { + pub fn from_one(selector: Selector<Impl>) -> Self { + #[cfg(debug_assertions)] + let selector_repr = unsafe { *(&selector as *const _ as *const usize) }; + let list = Self(ThinArcUnion::from_first(selector.into_data())); + #[cfg(debug_assertions)] + debug_assert_eq!( + selector_repr, + unsafe { *(&list as *const _ as *const usize) }, + "We rely on the same bit representation for the single selector variant" + ); + list + } + + pub fn from_iter(mut iter: impl ExactSizeIterator<Item = Selector<Impl>>) -> Self { + if iter.len() == 1 { + Self::from_one(iter.next().unwrap()) + } else { + Self(ThinArcUnion::from_second(ThinArc::from_header_and_iter( + (), + iter, + ))) + } + } + + #[inline] + pub fn slice(&self) -> &[Selector<Impl>] { + match self.0.borrow() { + ArcUnionBorrow::First(..) => { + // SAFETY: see from_one. + let selector: &Selector<Impl> = unsafe { std::mem::transmute(self) }; + std::slice::from_ref(selector) + }, + ArcUnionBorrow::Second(list) => list.get().slice(), + } + } + + #[inline] + pub fn len(&self) -> usize { + match self.0.borrow() { + ArcUnionBorrow::First(..) => 1, + ArcUnionBorrow::Second(list) => list.len(), + } + } + + /// Returns the address on the heap of the ThinArc for memory reporting. + pub fn thin_arc_heap_ptr(&self) -> *const ::std::os::raw::c_void { + match self.0.borrow() { + ArcUnionBorrow::First(s) => s.with_arc(|a| a.heap_ptr()), + ArcUnionBorrow::Second(s) => s.with_arc(|a| a.heap_ptr()), + } + } +} + +/// Uniquely identify a selector based on its components, which is behind ThinArc and +/// is therefore stable. +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +pub struct SelectorKey(usize); + +impl SelectorKey { + /// Create a new key based on the given selector. + pub fn new<Impl: SelectorImpl>(selector: &Selector<Impl>) -> Self { + Self(selector.0.slice().as_ptr() as usize) + } +} + +/// Whether or not we're using forgiving parsing mode +#[derive(PartialEq)] +enum ForgivingParsing { + /// Discard the entire selector list upon encountering any invalid selector. + /// This is the default behavior for almost all of CSS. + No, + /// Ignore invalid selectors, potentially creating an empty selector list. + /// + /// This is the error recovery mode of :is() and :where() + Yes, +} + +/// Flag indicating if we're parsing relative selectors. +#[derive(Copy, Clone, PartialEq)] +pub enum ParseRelative { + /// Expect selectors to start with a combinator, assuming descendant combinator if not present. + ForHas, + /// Allow selectors to start with a combinator, prepending a parent selector if so. Do nothing + /// otherwise + ForNesting, + /// Treat as parse error if any selector begins with a combinator. + No, +} + +impl<Impl: SelectorImpl> SelectorList<Impl> { + /// Returns a selector list with a single `&` + pub fn ampersand() -> Self { + Self::from_one(Selector::ampersand()) + } + + /// Parse a comma-separated list of Selectors. + /// <https://drafts.csswg.org/selectors/#grouping> + /// + /// Return the Selectors or Err if there is an invalid selector. + pub fn parse<'i, 't, P>( + parser: &P, + input: &mut CssParser<'i, 't>, + parse_relative: ParseRelative, + ) -> Result<Self, ParseError<'i, P::Error>> + where + P: Parser<'i, Impl = Impl>, + { + Self::parse_with_state( + parser, + input, + SelectorParsingState::empty(), + ForgivingParsing::No, + parse_relative, + ) + } + + #[inline] + fn parse_with_state<'i, 't, P>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, + recovery: ForgivingParsing, + parse_relative: ParseRelative, + ) -> Result<Self, ParseError<'i, P::Error>> + where + P: Parser<'i, Impl = Impl>, + { + let mut values = SmallVec::<[_; 4]>::new(); + let forgiving = recovery == ForgivingParsing::Yes && parser.allow_forgiving_selectors(); + loop { + let selector = input.parse_until_before(Delimiter::Comma, |input| { + let start = input.position(); + let mut selector = parse_selector(parser, input, state, parse_relative); + if forgiving && (selector.is_err() || input.expect_exhausted().is_err()) { + input.expect_no_error_token()?; + selector = Ok(Selector::new_invalid(input.slice_from(start))); + } + selector + })?; + + values.push(selector); + + match input.next() { + Ok(&Token::Comma) => {}, + Ok(_) => unreachable!(), + Err(_) => break, + } + } + Ok(Self::from_iter(values.into_iter())) + } + + /// Replaces the parent selector in all the items of the selector list. + pub fn replace_parent_selector(&self, parent: &SelectorList<Impl>) -> Self { + Self::from_iter( + self.slice() + .iter() + .map(|selector| selector.replace_parent_selector(parent)), + ) + } + + /// Creates a SelectorList from a Vec of selectors. Used in tests. + #[allow(dead_code)] + pub(crate) fn from_vec(v: Vec<Selector<Impl>>) -> Self { + SelectorList::from_iter(v.into_iter()) + } +} + +/// Parses one compound selector suitable for nested stuff like :-moz-any, etc. +fn parse_inner_compound_selector<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, +) -> Result<Selector<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + parse_selector( + parser, + input, + state | SelectorParsingState::DISALLOW_PSEUDOS | SelectorParsingState::DISALLOW_COMBINATORS, + ParseRelative::No, + ) +} + +/// Ancestor hashes for the bloom filter. We precompute these and store them +/// inline with selectors to optimize cache performance during matching. +/// This matters a lot. +/// +/// We use 4 hashes, which is copied from Gecko, who copied it from WebKit. +/// Note that increasing the number of hashes here will adversely affect the +/// cache hit when fast-rejecting long lists of Rules with inline hashes. +/// +/// Because the bloom filter only uses the bottom 24 bits of the hash, we pack +/// the fourth hash into the upper bits of the first three hashes in order to +/// shrink Rule (whose size matters a lot). This scheme minimizes the runtime +/// overhead of the packing for the first three hashes (we just need to mask +/// off the upper bits) at the expense of making the fourth somewhat more +/// complicated to assemble, because we often bail out before checking all the +/// hashes. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct AncestorHashes { + pub packed_hashes: [u32; 3], +} + +pub(crate) fn collect_selector_hashes<'a, Impl: SelectorImpl, Iter>( + iter: Iter, + quirks_mode: QuirksMode, + hashes: &mut [u32; 4], + len: &mut usize, + create_inner_iterator: fn(&'a Selector<Impl>) -> Iter, +) -> bool +where + Iter: Iterator<Item = &'a Component<Impl>>, +{ + for component in iter { + let hash = match *component { + Component::LocalName(LocalName { + ref name, + ref lower_name, + }) => { + // Only insert the local-name into the filter if it's all + // lowercase. Otherwise we would need to test both hashes, and + // our data structures aren't really set up for that. + if name != lower_name { + continue; + } + name.precomputed_hash() + }, + Component::DefaultNamespace(ref url) | Component::Namespace(_, ref url) => { + url.precomputed_hash() + }, + // In quirks mode, class and id selectors should match + // case-insensitively, so just avoid inserting them into the filter. + Component::ID(ref id) if quirks_mode != QuirksMode::Quirks => id.precomputed_hash(), + Component::Class(ref class) if quirks_mode != QuirksMode::Quirks => { + class.precomputed_hash() + }, + Component::AttributeInNoNamespace { ref local_name, .. } + if Impl::should_collect_attr_hash(local_name) => + { + // AttributeInNoNamespace is only used when local_name == + // local_name_lower. + local_name.precomputed_hash() + }, + Component::AttributeInNoNamespaceExists { + ref local_name, + ref local_name_lower, + .. + } => { + // Only insert the local-name into the filter if it's all + // lowercase. Otherwise we would need to test both hashes, and + // our data structures aren't really set up for that. + if local_name != local_name_lower || !Impl::should_collect_attr_hash(local_name) { + continue; + } + local_name.precomputed_hash() + }, + Component::AttributeOther(ref selector) => { + if selector.local_name != selector.local_name_lower || + !Impl::should_collect_attr_hash(&selector.local_name) + { + continue; + } + selector.local_name.precomputed_hash() + }, + Component::Is(ref list) | Component::Where(ref list) => { + // :where and :is OR their selectors, so we can't put any hash + // in the filter if there's more than one selector, as that'd + // exclude elements that may match one of the other selectors. + let slice = list.slice(); + if slice.len() == 1 && + !collect_selector_hashes( + create_inner_iterator(&slice[0]), + quirks_mode, + hashes, + len, + create_inner_iterator, + ) + { + return false; + } + continue; + }, + _ => continue, + }; + + hashes[*len] = hash & BLOOM_HASH_MASK; + *len += 1; + if *len == hashes.len() { + return false; + } + } + true +} + +fn collect_ancestor_hashes<Impl: SelectorImpl>( + iter: SelectorIter<Impl>, + quirks_mode: QuirksMode, + hashes: &mut [u32; 4], + len: &mut usize, +) { + collect_selector_hashes(AncestorIter::new(iter), quirks_mode, hashes, len, |s| { + AncestorIter(s.iter()) + }); +} + +impl AncestorHashes { + pub fn new<Impl: SelectorImpl>(selector: &Selector<Impl>, quirks_mode: QuirksMode) -> Self { + // Compute ancestor hashes for the bloom filter. + let mut hashes = [0u32; 4]; + let mut len = 0; + collect_ancestor_hashes(selector.iter(), quirks_mode, &mut hashes, &mut len); + debug_assert!(len <= 4); + + // Now, pack the fourth hash (if it exists) into the upper byte of each of + // the other three hashes. + if len == 4 { + let fourth = hashes[3]; + hashes[0] |= (fourth & 0x000000ff) << 24; + hashes[1] |= (fourth & 0x0000ff00) << 16; + hashes[2] |= (fourth & 0x00ff0000) << 8; + } + + AncestorHashes { + packed_hashes: [hashes[0], hashes[1], hashes[2]], + } + } + + /// Returns the fourth hash, reassembled from parts. + pub fn fourth_hash(&self) -> u32 { + ((self.packed_hashes[0] & 0xff000000) >> 24) | + ((self.packed_hashes[1] & 0xff000000) >> 16) | + ((self.packed_hashes[2] & 0xff000000) >> 8) + } +} + +#[inline] +pub fn namespace_empty_string<Impl: SelectorImpl>() -> Impl::NamespaceUrl { + // Rust type’s default, not default namespace + Impl::NamespaceUrl::default() +} + +type SelectorData<Impl> = ThinArc<SpecificityAndFlags, Component<Impl>>; + +/// A Selector stores a sequence of simple selectors and combinators. The +/// iterator classes allow callers to iterate at either the raw sequence level or +/// at the level of sequences of simple selectors separated by combinators. Most +/// callers want the higher-level iterator. +/// +/// We store compound selectors internally right-to-left (in matching order). +/// Additionally, we invert the order of top-level compound selectors so that +/// each one matches left-to-right. This is because matching namespace, local name, +/// id, and class are all relatively cheap, whereas matching pseudo-classes might +/// be expensive (depending on the pseudo-class). Since authors tend to put the +/// pseudo-classes on the right, it's faster to start matching on the left. +/// +/// This reordering doesn't change the semantics of selector matching, and we +/// handle it in to_css to make it invisible to serialization. +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +#[repr(transparent)] +pub struct Selector<Impl: SelectorImpl>(#[shmem(field_bound)] SelectorData<Impl>); + +impl<Impl: SelectorImpl> Selector<Impl> { + /// See Arc::mark_as_intentionally_leaked + pub fn mark_as_intentionally_leaked(&self) { + self.0.mark_as_intentionally_leaked() + } + + fn ampersand() -> Self { + Self(ThinArc::from_header_and_iter( + SpecificityAndFlags { + specificity: 0, + flags: SelectorFlags::HAS_PARENT, + }, + std::iter::once(Component::ParentSelector), + )) + } + + #[inline] + pub fn specificity(&self) -> u32 { + self.0.header.specificity + } + + #[inline] + pub(crate) fn flags(&self) -> SelectorFlags { + self.0.header.flags + } + + #[inline] + pub fn has_pseudo_element(&self) -> bool { + self.flags().intersects(SelectorFlags::HAS_PSEUDO) + } + + #[inline] + pub fn has_parent_selector(&self) -> bool { + self.flags().intersects(SelectorFlags::HAS_PARENT) + } + + #[inline] + pub fn is_slotted(&self) -> bool { + self.flags().intersects(SelectorFlags::HAS_SLOTTED) + } + + #[inline] + pub fn is_part(&self) -> bool { + self.flags().intersects(SelectorFlags::HAS_PART) + } + + #[inline] + pub fn parts(&self) -> Option<&[Impl::Identifier]> { + if !self.is_part() { + return None; + } + + let mut iter = self.iter(); + if self.has_pseudo_element() { + // Skip the pseudo-element. + for _ in &mut iter {} + + let combinator = iter.next_sequence()?; + debug_assert_eq!(combinator, Combinator::PseudoElement); + } + + for component in iter { + if let Component::Part(ref part) = *component { + return Some(part); + } + } + + debug_assert!(false, "is_part() lied somehow?"); + None + } + + #[inline] + pub fn pseudo_element(&self) -> Option<&Impl::PseudoElement> { + if !self.has_pseudo_element() { + return None; + } + + for component in self.iter() { + if let Component::PseudoElement(ref pseudo) = *component { + return Some(pseudo); + } + } + + debug_assert!(false, "has_pseudo_element lied!"); + None + } + + /// Whether this selector (pseudo-element part excluded) matches every element. + /// + /// Used for "pre-computed" pseudo-elements in components/style/stylist.rs + #[inline] + pub fn is_universal(&self) -> bool { + self.iter_raw_match_order().all(|c| { + matches!( + *c, + Component::ExplicitUniversalType | + Component::ExplicitAnyNamespace | + Component::Combinator(Combinator::PseudoElement) | + Component::PseudoElement(..) + ) + }) + } + + /// Returns an iterator over this selector in matching order (right-to-left). + /// When a combinator is reached, the iterator will return None, and + /// next_sequence() may be called to continue to the next sequence. + #[inline] + pub fn iter(&self) -> SelectorIter<Impl> { + SelectorIter { + iter: self.iter_raw_match_order(), + next_combinator: None, + } + } + + /// Same as `iter()`, but skips `RelativeSelectorAnchor` and its associated combinator. + #[inline] + pub fn iter_skip_relative_selector_anchor(&self) -> SelectorIter<Impl> { + if cfg!(debug_assertions) { + let mut selector_iter = self.iter_raw_parse_order_from(0); + assert!( + matches!( + selector_iter.next().unwrap(), + Component::RelativeSelectorAnchor + ), + "Relative selector does not start with RelativeSelectorAnchor" + ); + assert!( + selector_iter.next().unwrap().is_combinator(), + "Relative combinator does not exist" + ); + } + + SelectorIter { + iter: self.0.slice()[..self.len() - 2].iter(), + next_combinator: None, + } + } + + /// Whether this selector is a featureless :host selector, with no combinators to the left, and + /// optionally has a pseudo-element to the right. + #[inline] + pub fn is_featureless_host_selector_or_pseudo_element(&self) -> bool { + let flags = self.flags(); + flags.intersects(SelectorFlags::HAS_HOST) && + !flags.intersects(SelectorFlags::HAS_NON_FEATURELESS_COMPONENT) + } + + /// Returns an iterator over this selector in matching order (right-to-left), + /// skipping the rightmost |offset| Components. + #[inline] + pub fn iter_from(&self, offset: usize) -> SelectorIter<Impl> { + let iter = self.0.slice()[offset..].iter(); + SelectorIter { + iter, + next_combinator: None, + } + } + + /// Returns the combinator at index `index` (zero-indexed from the right), + /// or panics if the component is not a combinator. + #[inline] + pub fn combinator_at_match_order(&self, index: usize) -> Combinator { + match self.0.slice()[index] { + Component::Combinator(c) => c, + ref other => panic!( + "Not a combinator: {:?}, {:?}, index: {}", + other, self, index + ), + } + } + + /// Returns an iterator over the entire sequence of simple selectors and + /// combinators, in matching order (from right to left). + #[inline] + pub fn iter_raw_match_order(&self) -> slice::Iter<Component<Impl>> { + self.0.slice().iter() + } + + /// Returns the combinator at index `index` (zero-indexed from the left), + /// or panics if the component is not a combinator. + #[inline] + pub fn combinator_at_parse_order(&self, index: usize) -> Combinator { + match self.0.slice()[self.len() - index - 1] { + Component::Combinator(c) => c, + ref other => panic!( + "Not a combinator: {:?}, {:?}, index: {}", + other, self, index + ), + } + } + + /// Returns an iterator over the sequence of simple selectors and + /// combinators, in parse order (from left to right), starting from + /// `offset`. + #[inline] + pub fn iter_raw_parse_order_from(&self, offset: usize) -> Rev<slice::Iter<Component<Impl>>> { + self.0.slice()[..self.len() - offset].iter().rev() + } + + /// Creates a Selector from a vec of Components, specified in parse order. Used in tests. + #[allow(dead_code)] + pub(crate) fn from_vec( + vec: Vec<Component<Impl>>, + specificity: u32, + flags: SelectorFlags, + ) -> Self { + let mut builder = SelectorBuilder::default(); + for component in vec.into_iter() { + if let Some(combinator) = component.as_combinator() { + builder.push_combinator(combinator); + } else { + builder.push_simple_selector(component); + } + } + let spec = SpecificityAndFlags { specificity, flags }; + Selector(builder.build_with_specificity_and_flags(spec)) + } + + #[inline] + fn into_data(self) -> SelectorData<Impl> { + self.0 + } + + pub fn replace_parent_selector(&self, parent: &SelectorList<Impl>) -> Self { + let parent_specificity_and_flags = + selector_list_specificity_and_flags(parent.slice().iter()); + + let mut specificity = Specificity::from(self.specificity()); + let mut flags = self.flags() - SelectorFlags::HAS_PARENT; + + fn replace_parent_on_selector_list<Impl: SelectorImpl>( + orig: &[Selector<Impl>], + parent: &SelectorList<Impl>, + specificity: &mut Specificity, + flags: &mut SelectorFlags, + propagate_specificity: bool, + flags_to_propagate: SelectorFlags, + ) -> Option<SelectorList<Impl>> { + if !orig.iter().any(|s| s.has_parent_selector()) { + return None; + } + + let result = SelectorList::from_iter(orig.iter().map(|s| { + if !s.has_parent_selector() { + return s.clone(); + } + s.replace_parent_selector(parent) + })); + + let result_specificity_and_flags = + selector_list_specificity_and_flags(result.slice().iter()); + if propagate_specificity { + *specificity += Specificity::from( + result_specificity_and_flags.specificity - + selector_list_specificity_and_flags(orig.iter()).specificity, + ); + } + flags.insert( + result_specificity_and_flags + .flags + .intersection(flags_to_propagate), + ); + Some(result) + } + + fn replace_parent_on_relative_selector_list<Impl: SelectorImpl>( + orig: &[RelativeSelector<Impl>], + parent: &SelectorList<Impl>, + specificity: &mut Specificity, + flags: &mut SelectorFlags, + flags_to_propagate: SelectorFlags, + ) -> Vec<RelativeSelector<Impl>> { + let mut any = false; + + let result = orig + .iter() + .map(|s| { + if !s.selector.has_parent_selector() { + return s.clone(); + } + any = true; + RelativeSelector { + match_hint: s.match_hint, + selector: s.selector.replace_parent_selector(parent), + } + }) + .collect(); + + if !any { + return result; + } + + let result_specificity_and_flags = + relative_selector_list_specificity_and_flags(&result); + flags.insert( + result_specificity_and_flags + .flags + .intersection(flags_to_propagate), + ); + *specificity += Specificity::from( + result_specificity_and_flags.specificity - + relative_selector_list_specificity_and_flags(orig).specificity, + ); + result + } + + fn replace_parent_on_selector<Impl: SelectorImpl>( + orig: &Selector<Impl>, + parent: &SelectorList<Impl>, + specificity: &mut Specificity, + flags: &mut SelectorFlags, + flags_to_propagate: SelectorFlags, + ) -> Selector<Impl> { + if !orig.has_parent_selector() { + return orig.clone(); + } + let new_selector = orig.replace_parent_selector(parent); + *specificity += Specificity::from(new_selector.specificity() - orig.specificity()); + flags.insert(new_selector.flags().intersection(flags_to_propagate)); + new_selector + } + + let mut items = if !self.has_parent_selector() { + // Implicit `&` plus descendant combinator. + let iter = self.iter_raw_match_order(); + let len = iter.len() + 2; + specificity += Specificity::from(parent_specificity_and_flags.specificity); + flags.insert( + parent_specificity_and_flags + .flags + .intersection(SelectorFlags::for_nesting()), + ); + let iter = iter + .cloned() + .chain(std::iter::once(Component::Combinator( + Combinator::Descendant, + ))) + .chain(std::iter::once(Component::Is(parent.clone()))); + UniqueArc::from_header_and_iter_with_size(Default::default(), iter, len) + } else { + let iter = self.iter_raw_match_order().map(|component| { + use self::Component::*; + match *component { + LocalName(..) | + ID(..) | + Class(..) | + AttributeInNoNamespaceExists { .. } | + AttributeInNoNamespace { .. } | + AttributeOther(..) | + ExplicitUniversalType | + ExplicitAnyNamespace | + ExplicitNoNamespace | + DefaultNamespace(..) | + Namespace(..) | + Root | + Empty | + Scope | + Nth(..) | + NonTSPseudoClass(..) | + PseudoElement(..) | + Combinator(..) | + Host(None) | + Part(..) | + Invalid(..) | + RelativeSelectorAnchor => component.clone(), + ParentSelector => { + specificity += Specificity::from(parent_specificity_and_flags.specificity); + flags.insert( + parent_specificity_and_flags + .flags + .intersection(SelectorFlags::for_nesting()), + ); + Is(parent.clone()) + }, + Negation(ref selectors) => { + Negation( + replace_parent_on_selector_list( + selectors.slice(), + parent, + &mut specificity, + &mut flags, + /* propagate_specificity = */ true, + SelectorFlags::for_nesting(), + ) + .unwrap_or_else(|| selectors.clone()), + ) + }, + Is(ref selectors) => { + Is(replace_parent_on_selector_list( + selectors.slice(), + parent, + &mut specificity, + &mut flags, + /* propagate_specificity = */ true, + SelectorFlags::for_nesting(), + ) + .unwrap_or_else(|| selectors.clone())) + }, + Where(ref selectors) => { + Where( + replace_parent_on_selector_list( + selectors.slice(), + parent, + &mut specificity, + &mut flags, + /* propagate_specificity = */ false, + SelectorFlags::for_nesting(), + ) + .unwrap_or_else(|| selectors.clone()), + ) + }, + Has(ref selectors) => Has(replace_parent_on_relative_selector_list( + selectors, + parent, + &mut specificity, + &mut flags, + SelectorFlags::for_nesting(), + ) + .into_boxed_slice()), + + Host(Some(ref selector)) => Host(Some(replace_parent_on_selector( + selector, + parent, + &mut specificity, + &mut flags, + SelectorFlags::for_nesting() - SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + ))), + NthOf(ref data) => { + let selectors = replace_parent_on_selector_list( + data.selectors(), + parent, + &mut specificity, + &mut flags, + /* propagate_specificity = */ true, + SelectorFlags::for_nesting(), + ); + NthOf(match selectors { + Some(s) => { + NthOfSelectorData::new(data.nth_data(), s.slice().iter().cloned()) + }, + None => data.clone(), + }) + }, + Slotted(ref selector) => Slotted(replace_parent_on_selector( + selector, + parent, + &mut specificity, + &mut flags, + SelectorFlags::for_nesting(), + )), + } + }); + UniqueArc::from_header_and_iter(Default::default(), iter) + }; + *items.header_mut() = SpecificityAndFlags { + specificity: specificity.into(), + flags, + }; + Selector(items.shareable()) + } + + /// Returns count of simple selectors and combinators in the Selector. + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns the address on the heap of the ThinArc for memory reporting. + pub fn thin_arc_heap_ptr(&self) -> *const ::std::os::raw::c_void { + self.0.heap_ptr() + } + + /// Traverse selector components inside `self`. + /// + /// Implementations of this method should call `SelectorVisitor` methods + /// or other impls of `Visit` as appropriate based on the fields of `Self`. + /// + /// A return value of `false` indicates terminating the traversal. + /// It should be propagated with an early return. + /// On the contrary, `true` indicates that all fields of `self` have been traversed: + /// + /// ```rust,ignore + /// if !visitor.visit_simple_selector(&self.some_simple_selector) { + /// return false; + /// } + /// if !self.some_component.visit(visitor) { + /// return false; + /// } + /// true + /// ``` + pub fn visit<V>(&self, visitor: &mut V) -> bool + where + V: SelectorVisitor<Impl = Impl>, + { + let mut current = self.iter(); + let mut combinator = None; + loop { + if !visitor.visit_complex_selector(combinator) { + return false; + } + + for selector in &mut current { + if !selector.visit(visitor) { + return false; + } + } + + combinator = current.next_sequence(); + if combinator.is_none() { + break; + } + } + + true + } + + /// Parse a selector, without any pseudo-element. + #[inline] + pub fn parse<'i, 't, P>( + parser: &P, + input: &mut CssParser<'i, 't>, + ) -> Result<Self, ParseError<'i, P::Error>> + where + P: Parser<'i, Impl = Impl>, + { + parse_selector( + parser, + input, + SelectorParsingState::empty(), + ParseRelative::No, + ) + } + + pub fn new_invalid(s: &str) -> Self { + fn check_for_parent(input: &mut CssParser, has_parent: &mut bool) { + while let Ok(t) = input.next() { + match *t { + Token::Function(_) | + Token::ParenthesisBlock | + Token::CurlyBracketBlock | + Token::SquareBracketBlock => { + let _ = input.parse_nested_block( + |i| -> Result<(), ParseError<'_, BasicParseError>> { + check_for_parent(i, has_parent); + Ok(()) + }, + ); + }, + Token::Delim('&') => { + *has_parent = true; + }, + _ => {}, + } + if *has_parent { + break; + } + } + } + let mut has_parent = false; + { + let mut parser = cssparser::ParserInput::new(s); + let mut parser = CssParser::new(&mut parser); + check_for_parent(&mut parser, &mut has_parent); + } + Self(ThinArc::from_header_and_iter( + SpecificityAndFlags { + specificity: 0, + flags: if has_parent { + SelectorFlags::HAS_PARENT + } else { + SelectorFlags::empty() + }, + }, + std::iter::once(Component::Invalid(Arc::new(String::from(s.trim())))), + )) + } + + /// Is the compound starting at the offset the subject compound, or referring to its pseudo-element? + pub fn is_rightmost(&self, offset: usize) -> bool { + // There can really be only one pseudo-element, and it's not really valid for anything else to + // follow it. + offset == 0 || matches!(self.combinator_at_match_order(offset - 1), Combinator::PseudoElement) + } +} + +#[derive(Clone)] +pub struct SelectorIter<'a, Impl: 'a + SelectorImpl> { + iter: slice::Iter<'a, Component<Impl>>, + next_combinator: Option<Combinator>, +} + +impl<'a, Impl: 'a + SelectorImpl> SelectorIter<'a, Impl> { + /// Prepares this iterator to point to the next sequence to the left, + /// returning the combinator if the sequence was found. + #[inline] + pub fn next_sequence(&mut self) -> Option<Combinator> { + self.next_combinator.take() + } + + /// Whether this selector is a featureless host selector, with no + /// combinators to the left. + #[inline] + pub(crate) fn is_featureless_host_selector(&mut self) -> bool { + self.selector_length() > 0 && + self.all(|component| component.matches_featureless_host()) && + self.next_sequence().is_none() + } + + #[inline] + pub(crate) fn matches_for_stateless_pseudo_element(&mut self) -> bool { + let first = match self.next() { + Some(c) => c, + // Note that this is the common path that we keep inline: the + // pseudo-element not having anything to its right. + None => return true, + }; + self.matches_for_stateless_pseudo_element_internal(first) + } + + #[inline(never)] + fn matches_for_stateless_pseudo_element_internal(&mut self, first: &Component<Impl>) -> bool { + if !first.matches_for_stateless_pseudo_element() { + return false; + } + for component in self { + // The only other parser-allowed Components in this sequence are + // state pseudo-classes, or one of the other things that can contain + // them. + if !component.matches_for_stateless_pseudo_element() { + return false; + } + } + true + } + + /// Returns remaining count of the simple selectors and combinators in the Selector. + #[inline] + pub fn selector_length(&self) -> usize { + self.iter.len() + } +} + +impl<'a, Impl: SelectorImpl> Iterator for SelectorIter<'a, Impl> { + type Item = &'a Component<Impl>; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + debug_assert!( + self.next_combinator.is_none(), + "You should call next_sequence!" + ); + match *self.iter.next()? { + Component::Combinator(c) => { + self.next_combinator = Some(c); + None + }, + ref x => Some(x), + } + } +} + +impl<'a, Impl: SelectorImpl> fmt::Debug for SelectorIter<'a, Impl> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let iter = self.iter.clone().rev(); + for component in iter { + component.to_css(f)? + } + Ok(()) + } +} + +/// An iterator over all combinators in a selector. Does not traverse selectors within psuedoclasses. +struct CombinatorIter<'a, Impl: 'a + SelectorImpl>(SelectorIter<'a, Impl>); +impl<'a, Impl: 'a + SelectorImpl> CombinatorIter<'a, Impl> { + fn new(inner: SelectorIter<'a, Impl>) -> Self { + let mut result = CombinatorIter(inner); + result.consume_non_combinators(); + result + } + + fn consume_non_combinators(&mut self) { + while self.0.next().is_some() {} + } +} + +impl<'a, Impl: SelectorImpl> Iterator for CombinatorIter<'a, Impl> { + type Item = Combinator; + fn next(&mut self) -> Option<Self::Item> { + let result = self.0.next_sequence(); + self.consume_non_combinators(); + result + } +} + +/// An iterator over all simple selectors belonging to ancestors. +struct AncestorIter<'a, Impl: 'a + SelectorImpl>(SelectorIter<'a, Impl>); +impl<'a, Impl: 'a + SelectorImpl> AncestorIter<'a, Impl> { + /// Creates an AncestorIter. The passed-in iterator is assumed to point to + /// the beginning of the child sequence, which will be skipped. + fn new(inner: SelectorIter<'a, Impl>) -> Self { + let mut result = AncestorIter(inner); + result.skip_until_ancestor(); + result + } + + /// Skips a sequence of simple selectors and all subsequent sequences until + /// a non-pseudo-element ancestor combinator is reached. + fn skip_until_ancestor(&mut self) { + loop { + while self.0.next().is_some() {} + // If this is ever changed to stop at the "pseudo-element" + // combinator, we will need to fix the way we compute hashes for + // revalidation selectors. + if self.0.next_sequence().map_or(true, |x| { + matches!(x, Combinator::Child | Combinator::Descendant) + }) { + break; + } + } + } +} + +impl<'a, Impl: SelectorImpl> Iterator for AncestorIter<'a, Impl> { + type Item = &'a Component<Impl>; + fn next(&mut self) -> Option<Self::Item> { + // Grab the next simple selector in the sequence if available. + let next = self.0.next(); + if next.is_some() { + return next; + } + + // See if there are more sequences. If so, skip any non-ancestor sequences. + if let Some(combinator) = self.0.next_sequence() { + if !matches!(combinator, Combinator::Child | Combinator::Descendant) { + self.skip_until_ancestor(); + } + } + + self.0.next() + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, ToShmem)] +pub enum Combinator { + Child, // > + Descendant, // space + NextSibling, // + + LaterSibling, // ~ + /// A dummy combinator we use to the left of pseudo-elements. + /// + /// It serializes as the empty string, and acts effectively as a child + /// combinator in most cases. If we ever actually start using a child + /// combinator for this, we will need to fix up the way hashes are computed + /// for revalidation selectors. + PseudoElement, + /// Another combinator used for ::slotted(), which represent the jump from + /// a node to its assigned slot. + SlotAssignment, + /// Another combinator used for `::part()`, which represents the jump from + /// the part to the containing shadow host. + Part, +} + +impl Combinator { + /// Returns true if this combinator is a child or descendant combinator. + #[inline] + pub fn is_ancestor(&self) -> bool { + matches!( + *self, + Combinator::Child | + Combinator::Descendant | + Combinator::PseudoElement | + Combinator::SlotAssignment + ) + } + + /// Returns true if this combinator is a pseudo-element combinator. + #[inline] + pub fn is_pseudo_element(&self) -> bool { + matches!(*self, Combinator::PseudoElement) + } + + /// Returns true if this combinator is a next- or later-sibling combinator. + #[inline] + pub fn is_sibling(&self) -> bool { + matches!(*self, Combinator::NextSibling | Combinator::LaterSibling) + } +} + +/// An enum for the different types of :nth- pseudoclasses +#[derive(Copy, Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub enum NthType { + Child, + LastChild, + OnlyChild, + OfType, + LastOfType, + OnlyOfType, +} + +impl NthType { + pub fn is_only(self) -> bool { + self == Self::OnlyChild || self == Self::OnlyOfType + } + + pub fn is_of_type(self) -> bool { + self == Self::OfType || self == Self::LastOfType || self == Self::OnlyOfType + } + + pub fn is_from_end(self) -> bool { + self == Self::LastChild || self == Self::LastOfType + } +} + +/// The properties that comprise an :nth- pseudoclass as of Selectors 3 (e.g., +/// nth-child(An+B)). +/// https://www.w3.org/TR/selectors-3/#nth-child-pseudo +#[derive(Copy, Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct NthSelectorData { + pub ty: NthType, + pub is_function: bool, + pub a: i32, + pub b: i32, +} + +impl NthSelectorData { + /// Returns selector data for :only-{child,of-type} + #[inline] + pub const fn only(of_type: bool) -> Self { + Self { + ty: if of_type { + NthType::OnlyOfType + } else { + NthType::OnlyChild + }, + is_function: false, + a: 0, + b: 1, + } + } + + /// Returns selector data for :first-{child,of-type} + #[inline] + pub const fn first(of_type: bool) -> Self { + Self { + ty: if of_type { + NthType::OfType + } else { + NthType::Child + }, + is_function: false, + a: 0, + b: 1, + } + } + + /// Returns selector data for :last-{child,of-type} + #[inline] + pub const fn last(of_type: bool) -> Self { + Self { + ty: if of_type { + NthType::LastOfType + } else { + NthType::LastChild + }, + is_function: false, + a: 0, + b: 1, + } + } + + /// Returns true if this is an edge selector that is not `:*-of-type`` + #[inline] + pub fn is_simple_edge(&self) -> bool { + self.a == 0 && self.b == 1 && !self.ty.is_of_type() + } + + /// Writes the beginning of the selector. + #[inline] + fn write_start<W: fmt::Write>(&self, dest: &mut W) -> fmt::Result { + dest.write_str(match self.ty { + NthType::Child if self.is_function => ":nth-child(", + NthType::Child => ":first-child", + NthType::LastChild if self.is_function => ":nth-last-child(", + NthType::LastChild => ":last-child", + NthType::OfType if self.is_function => ":nth-of-type(", + NthType::OfType => ":first-of-type", + NthType::LastOfType if self.is_function => ":nth-last-of-type(", + NthType::LastOfType => ":last-of-type", + NthType::OnlyChild => ":only-child", + NthType::OnlyOfType => ":only-of-type", + }) + } + + /// Serialize <an+b> (part of the CSS Syntax spec, but currently only used here). + /// <https://drafts.csswg.org/css-syntax-3/#serialize-an-anb-value> + #[inline] + fn write_affine<W: fmt::Write>(&self, dest: &mut W) -> fmt::Result { + match (self.a, self.b) { + (0, 0) => dest.write_char('0'), + + (1, 0) => dest.write_char('n'), + (-1, 0) => dest.write_str("-n"), + (_, 0) => write!(dest, "{}n", self.a), + + (0, _) => write!(dest, "{}", self.b), + (1, _) => write!(dest, "n{:+}", self.b), + (-1, _) => write!(dest, "-n{:+}", self.b), + (_, _) => write!(dest, "{}n{:+}", self.a, self.b), + } + } +} + +/// The properties that comprise an :nth- pseudoclass as of Selectors 4 (e.g., +/// nth-child(An+B [of S]?)). +/// https://www.w3.org/TR/selectors-4/#nth-child-pseudo +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct NthOfSelectorData<Impl: SelectorImpl>( + #[shmem(field_bound)] ThinArc<NthSelectorData, Selector<Impl>>, +); + +impl<Impl: SelectorImpl> NthOfSelectorData<Impl> { + /// Returns selector data for :nth-{,last-}{child,of-type}(An+B [of S]) + #[inline] + pub fn new<I>(nth_data: &NthSelectorData, selectors: I) -> Self + where + I: Iterator<Item = Selector<Impl>> + ExactSizeIterator, + { + Self(ThinArc::from_header_and_iter(*nth_data, selectors)) + } + + /// Returns the An+B part of the selector + #[inline] + pub fn nth_data(&self) -> &NthSelectorData { + &self.0.header + } + + /// Returns the selector list part of the selector + #[inline] + pub fn selectors(&self) -> &[Selector<Impl>] { + self.0.slice() + } +} + +/// Flag indicating where a given relative selector's match would be contained. +#[derive(Clone, Copy, Eq, PartialEq, ToShmem)] +pub enum RelativeSelectorMatchHint { + /// Within this element's subtree. + InSubtree, + /// Within this element's direct children. + InChild, + /// This element's next sibling. + InNextSibling, + /// Within this element's next sibling's subtree. + InNextSiblingSubtree, + /// Within this element's subsequent siblings. + InSibling, + /// Across this element's subsequent siblings and their subtrees. + InSiblingSubtree, +} + +impl RelativeSelectorMatchHint { + /// Create a new relative selector match hint based on its composition. + pub fn new( + relative_combinator: Combinator, + has_child_or_descendants: bool, + has_adjacent_or_next_siblings: bool, + ) -> Self { + match relative_combinator { + Combinator::Descendant => RelativeSelectorMatchHint::InSubtree, + Combinator::Child => { + if !has_child_or_descendants { + RelativeSelectorMatchHint::InChild + } else { + // Technically, for any composition that consists of child combinators only, + // the search space is depth-constrained, but it's probably not worth optimizing for. + RelativeSelectorMatchHint::InSubtree + } + }, + Combinator::NextSibling => { + if !has_child_or_descendants && !has_adjacent_or_next_siblings { + RelativeSelectorMatchHint::InNextSibling + } else if !has_child_or_descendants && has_adjacent_or_next_siblings { + RelativeSelectorMatchHint::InSibling + } else if has_child_or_descendants && !has_adjacent_or_next_siblings { + // Match won't cross multiple siblings. + RelativeSelectorMatchHint::InNextSiblingSubtree + } else { + RelativeSelectorMatchHint::InSiblingSubtree + } + }, + Combinator::LaterSibling => { + if !has_child_or_descendants { + RelativeSelectorMatchHint::InSibling + } else { + // Even if the match may not cross multiple siblings, we have to look until + // we find a match anyway. + RelativeSelectorMatchHint::InSiblingSubtree + } + }, + Combinator::Part | Combinator::PseudoElement | Combinator::SlotAssignment => { + debug_assert!(false, "Unexpected relative combinator"); + RelativeSelectorMatchHint::InSubtree + }, + } + } + + /// Is the match traversal direction towards the descendant of this element (As opposed to siblings)? + pub fn is_descendant_direction(&self) -> bool { + matches!(*self, Self::InChild | Self::InSubtree) + } + + /// Is the match traversal terminated at the next sibling? + pub fn is_next_sibling(&self) -> bool { + matches!(*self, Self::InNextSibling | Self::InNextSiblingSubtree) + } + + /// Does the match involve matching the subtree? + pub fn is_subtree(&self) -> bool { + matches!( + *self, + Self::InSubtree | Self::InSiblingSubtree | Self::InNextSiblingSubtree + ) + } +} + +/// Count of combinators in a given relative selector, not traversing selectors of pseudoclasses. +#[derive(Clone, Copy)] +pub struct RelativeSelectorCombinatorCount { + relative_combinator: Combinator, + pub child_or_descendants: usize, + pub adjacent_or_next_siblings: usize, +} + +impl RelativeSelectorCombinatorCount { + /// Create a new relative selector combinator count from a given relative selector. + pub fn new<Impl: SelectorImpl>(relative_selector: &RelativeSelector<Impl>) -> Self { + let mut result = RelativeSelectorCombinatorCount { + relative_combinator: relative_selector.selector.combinator_at_parse_order(1), + child_or_descendants: 0, + adjacent_or_next_siblings: 0, + }; + + for combinator in CombinatorIter::new( + relative_selector + .selector + .iter_skip_relative_selector_anchor(), + ) { + match combinator { + Combinator::Descendant | Combinator::Child => { + result.child_or_descendants += 1; + }, + Combinator::NextSibling | Combinator::LaterSibling => { + result.adjacent_or_next_siblings += 1; + }, + Combinator::Part | Combinator::PseudoElement | Combinator::SlotAssignment => { + continue + }, + }; + } + result + } + + /// Get the match hint based on the current combinator count. + pub fn get_match_hint(&self) -> RelativeSelectorMatchHint { + RelativeSelectorMatchHint::new( + self.relative_combinator, + self.child_or_descendants != 0, + self.adjacent_or_next_siblings != 0, + ) + } +} + +/// Storage for a relative selector. +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct RelativeSelector<Impl: SelectorImpl> { + /// Match space constraining hint. + pub match_hint: RelativeSelectorMatchHint, + /// The selector. Guaranteed to contain `RelativeSelectorAnchor` and the relative combinator in parse order. + #[shmem(field_bound)] + pub selector: Selector<Impl>, +} + +bitflags! { + /// Composition of combinators in a given selector, not traversing selectors of pseudoclasses. + #[derive(Clone, Debug, Eq, PartialEq)] + struct CombinatorComposition: u8 { + const DESCENDANTS = 1 << 0; + const SIBLINGS = 1 << 1; + } +} + +impl CombinatorComposition { + fn for_relative_selector<Impl: SelectorImpl>(inner_selector: &Selector<Impl>) -> Self { + let mut result = CombinatorComposition::empty(); + for combinator in CombinatorIter::new(inner_selector.iter_skip_relative_selector_anchor()) { + match combinator { + Combinator::Descendant | Combinator::Child => { + result.insert(Self::DESCENDANTS); + }, + Combinator::NextSibling | Combinator::LaterSibling => { + result.insert(Self::SIBLINGS); + }, + Combinator::Part | Combinator::PseudoElement | Combinator::SlotAssignment => { + continue + }, + }; + if result.is_all() { + break; + } + } + return result; + } +} + +impl<Impl: SelectorImpl> RelativeSelector<Impl> { + fn from_selector_list(selector_list: SelectorList<Impl>) -> Box<[Self]> { + let vec: Vec<Self> = selector_list + .slice() + .iter() + .map(|selector| { + // It's more efficient to keep track of all this during the parse time, but that seems like a lot of special + // case handling for what it's worth. + if cfg!(debug_assertions) { + let relative_selector_anchor = selector.iter_raw_parse_order_from(0).next(); + debug_assert!( + relative_selector_anchor.is_some(), + "Relative selector is empty" + ); + debug_assert!( + matches!( + relative_selector_anchor.unwrap(), + Component::RelativeSelectorAnchor + ), + "Relative selector anchor is missing" + ); + } + // Leave a hint for narrowing down the search space when we're matching. + let composition = CombinatorComposition::for_relative_selector(&selector); + let match_hint = RelativeSelectorMatchHint::new( + selector.combinator_at_parse_order(1), + composition.intersects(CombinatorComposition::DESCENDANTS), + composition.intersects(CombinatorComposition::SIBLINGS), + ); + RelativeSelector { + match_hint, + selector: selector.clone(), + } + }) + .collect(); + vec.into_boxed_slice() + } +} + +/// A CSS simple selector or combinator. We store both in the same enum for +/// optimal packing and cache performance, see [1]. +/// +/// [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1357973 +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub enum Component<Impl: SelectorImpl> { + LocalName(LocalName<Impl>), + + ID(#[shmem(field_bound)] Impl::Identifier), + Class(#[shmem(field_bound)] Impl::Identifier), + + AttributeInNoNamespaceExists { + #[shmem(field_bound)] + local_name: Impl::LocalName, + local_name_lower: Impl::LocalName, + }, + // Used only when local_name is already lowercase. + AttributeInNoNamespace { + local_name: Impl::LocalName, + operator: AttrSelectorOperator, + #[shmem(field_bound)] + value: Impl::AttrValue, + case_sensitivity: ParsedCaseSensitivity, + }, + // Use a Box in the less common cases with more data to keep size_of::<Component>() small. + AttributeOther(Box<AttrSelectorWithOptionalNamespace<Impl>>), + + ExplicitUniversalType, + ExplicitAnyNamespace, + + ExplicitNoNamespace, + DefaultNamespace(#[shmem(field_bound)] Impl::NamespaceUrl), + Namespace( + #[shmem(field_bound)] Impl::NamespacePrefix, + #[shmem(field_bound)] Impl::NamespaceUrl, + ), + + /// Pseudo-classes + Negation(SelectorList<Impl>), + Root, + Empty, + Scope, + ParentSelector, + Nth(NthSelectorData), + NthOf(NthOfSelectorData<Impl>), + NonTSPseudoClass(#[shmem(field_bound)] Impl::NonTSPseudoClass), + /// The ::slotted() pseudo-element: + /// + /// https://drafts.csswg.org/css-scoping/#slotted-pseudo + /// + /// The selector here is a compound selector, that is, no combinators. + /// + /// NOTE(emilio): This should support a list of selectors, but as of this + /// writing no other browser does, and that allows them to put ::slotted() + /// in the rule hash, so we do that too. + /// + /// See https://github.com/w3c/csswg-drafts/issues/2158 + Slotted(Selector<Impl>), + /// The `::part` pseudo-element. + /// https://drafts.csswg.org/css-shadow-parts/#part + Part(#[shmem(field_bound)] Box<[Impl::Identifier]>), + /// The `:host` pseudo-class: + /// + /// https://drafts.csswg.org/css-scoping/#host-selector + /// + /// NOTE(emilio): This should support a list of selectors, but as of this + /// writing no other browser does, and that allows them to put :host() + /// in the rule hash, so we do that too. + /// + /// See https://github.com/w3c/csswg-drafts/issues/2158 + Host(Option<Selector<Impl>>), + /// The `:where` pseudo-class. + /// + /// https://drafts.csswg.org/selectors/#zero-matches + /// + /// The inner argument is conceptually a SelectorList, but we move the + /// selectors to the heap to keep Component small. + Where(SelectorList<Impl>), + /// The `:is` pseudo-class. + /// + /// https://drafts.csswg.org/selectors/#matches-pseudo + /// + /// Same comment as above re. the argument. + Is(SelectorList<Impl>), + /// The `:has` pseudo-class. + /// + /// https://drafts.csswg.org/selectors/#has-pseudo + /// + /// Same comment as above re. the argument. + Has(Box<[RelativeSelector<Impl>]>), + /// An invalid selector inside :is() / :where(). + Invalid(Arc<String>), + /// An implementation-dependent pseudo-element selector. + PseudoElement(#[shmem(field_bound)] Impl::PseudoElement), + + Combinator(Combinator), + + /// Used only for relative selectors, which starts with a combinator + /// (With an implied descendant combinator if not specified). + /// + /// https://drafts.csswg.org/selectors-4/#typedef-relative-selector + RelativeSelectorAnchor, +} + +impl<Impl: SelectorImpl> Component<Impl> { + /// Returns true if this is a combinator. + #[inline] + pub fn is_combinator(&self) -> bool { + matches!(*self, Component::Combinator(_)) + } + + /// Returns true if this is a :host() selector. + #[inline] + pub fn is_host(&self) -> bool { + matches!(*self, Component::Host(..)) + } + + /// Returns true if this is a :host() selector. + #[inline] + pub fn matches_featureless_host(&self) -> bool { + match *self { + Component::Host(..) => true, + Component::Where(ref l) | Component::Is(ref l) => { + // TODO(emilio): For now we use .all() rather than .any(), because not doing so + // brings up a fair amount of extra complexity (we can't make the decision on + // whether to walk out statically). + l.slice() + .iter() + .all(|i| i.is_featureless_host_selector_or_pseudo_element()) + }, + _ => false, + } + } + + /// Returns the value as a combinator if applicable, None otherwise. + pub fn as_combinator(&self) -> Option<Combinator> { + match *self { + Component::Combinator(c) => Some(c), + _ => None, + } + } + + /// Whether this component is valid after a pseudo-element. Only intended + /// for sanity-checking. + pub fn maybe_allowed_after_pseudo_element(&self) -> bool { + match *self { + Component::NonTSPseudoClass(..) => true, + Component::Negation(ref selectors) | + Component::Is(ref selectors) | + Component::Where(ref selectors) => selectors.slice().iter().all(|selector| { + selector + .iter_raw_match_order() + .all(|c| c.maybe_allowed_after_pseudo_element()) + }), + _ => false, + } + } + + /// Whether a given selector should match for stateless pseudo-elements. + /// + /// This is a bit subtle: Only selectors that return true in + /// `maybe_allowed_after_pseudo_element` should end up here, and + /// `NonTSPseudoClass` never matches (as it is a stateless pseudo after + /// all). + fn matches_for_stateless_pseudo_element(&self) -> bool { + debug_assert!( + self.maybe_allowed_after_pseudo_element(), + "Someone messed up pseudo-element parsing: {:?}", + *self + ); + match *self { + Component::Negation(ref selectors) => !selectors.slice().iter().all(|selector| { + selector + .iter_raw_match_order() + .all(|c| c.matches_for_stateless_pseudo_element()) + }), + Component::Is(ref selectors) | Component::Where(ref selectors) => { + selectors.slice().iter().any(|selector| { + selector + .iter_raw_match_order() + .all(|c| c.matches_for_stateless_pseudo_element()) + }) + }, + _ => false, + } + } + + pub fn visit<V>(&self, visitor: &mut V) -> bool + where + V: SelectorVisitor<Impl = Impl>, + { + use self::Component::*; + if !visitor.visit_simple_selector(self) { + return false; + } + + match *self { + Slotted(ref selector) => { + if !selector.visit(visitor) { + return false; + } + }, + Host(Some(ref selector)) => { + if !selector.visit(visitor) { + return false; + } + }, + AttributeInNoNamespaceExists { + ref local_name, + ref local_name_lower, + } => { + if !visitor.visit_attribute_selector( + &NamespaceConstraint::Specific(&namespace_empty_string::<Impl>()), + local_name, + local_name_lower, + ) { + return false; + } + }, + AttributeInNoNamespace { ref local_name, .. } => { + if !visitor.visit_attribute_selector( + &NamespaceConstraint::Specific(&namespace_empty_string::<Impl>()), + local_name, + local_name, + ) { + return false; + } + }, + AttributeOther(ref attr_selector) => { + let empty_string; + let namespace = match attr_selector.namespace() { + Some(ns) => ns, + None => { + empty_string = crate::parser::namespace_empty_string::<Impl>(); + NamespaceConstraint::Specific(&empty_string) + }, + }; + if !visitor.visit_attribute_selector( + &namespace, + &attr_selector.local_name, + &attr_selector.local_name_lower, + ) { + return false; + } + }, + + NonTSPseudoClass(ref pseudo_class) => { + if !pseudo_class.visit(visitor) { + return false; + } + }, + Negation(ref list) | Is(ref list) | Where(ref list) => { + let list_kind = SelectorListKind::from_component(self); + debug_assert!(!list_kind.is_empty()); + if !visitor.visit_selector_list(list_kind, list.slice()) { + return false; + } + }, + NthOf(ref nth_of_data) => { + if !visitor.visit_selector_list(SelectorListKind::NTH_OF, nth_of_data.selectors()) { + return false; + } + }, + Has(ref list) => { + if !visitor.visit_relative_selector_list(list) { + return false; + } + }, + _ => {}, + } + + true + } + + // Returns true if this has any selector that requires an index calculation. e.g. + // :nth-child, :first-child, etc. For nested selectors, return true only if the + // indexed selector is in its subject compound. + pub fn has_indexed_selector_in_subject(&self) -> bool { + match *self { + Component::NthOf(..) | Component::Nth(..) => return true, + Component::Is(ref selectors) | + Component::Where(ref selectors) | + Component::Negation(ref selectors) => { + // Check the subject compound. + for selector in selectors.slice() { + let mut iter = selector.iter(); + while let Some(c) = iter.next() { + if c.has_indexed_selector_in_subject() { + return true; + } + } + } + }, + _ => (), + }; + false + } +} + +#[derive(Clone, Eq, PartialEq, ToShmem)] +#[shmem(no_bounds)] +pub struct LocalName<Impl: SelectorImpl> { + #[shmem(field_bound)] + pub name: Impl::LocalName, + pub lower_name: Impl::LocalName, +} + +impl<Impl: SelectorImpl> Debug for Selector<Impl> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("Selector(")?; + self.to_css(f)?; + write!( + f, + ", specificity = {:#x}, flags = {:?})", + self.specificity(), + self.flags() + ) + } +} + +impl<Impl: SelectorImpl> Debug for Component<Impl> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.to_css(f) + } +} +impl<Impl: SelectorImpl> Debug for AttrSelectorWithOptionalNamespace<Impl> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.to_css(f) + } +} +impl<Impl: SelectorImpl> Debug for LocalName<Impl> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.to_css(f) + } +} + +fn serialize_selector_list<'a, Impl, I, W>(iter: I, dest: &mut W) -> fmt::Result +where + Impl: SelectorImpl, + I: Iterator<Item = &'a Selector<Impl>>, + W: fmt::Write, +{ + let mut first = true; + for selector in iter { + if !first { + dest.write_str(", ")?; + } + first = false; + selector.to_css(dest)?; + } + Ok(()) +} + +impl<Impl: SelectorImpl> ToCss for SelectorList<Impl> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + serialize_selector_list(self.slice().iter(), dest) + } +} + +impl<Impl: SelectorImpl> ToCss for Selector<Impl> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + // Compound selectors invert the order of their contents, so we need to + // undo that during serialization. + // + // This two-iterator strategy involves walking over the selector twice. + // We could do something more clever, but selector serialization probably + // isn't hot enough to justify it, and the stringification likely + // dominates anyway. + // + // NB: A parse-order iterator is a Rev<>, which doesn't expose as_slice(), + // which we need for |split|. So we split by combinators on a match-order + // sequence and then reverse. + + let mut combinators = self + .iter_raw_match_order() + .rev() + .filter_map(|x| x.as_combinator()); + let compound_selectors = self + .iter_raw_match_order() + .as_slice() + .split(|x| x.is_combinator()) + .rev(); + + let mut combinators_exhausted = false; + for compound in compound_selectors { + debug_assert!(!combinators_exhausted); + + // https://drafts.csswg.org/cssom/#serializing-selectors + if compound.is_empty() { + continue; + } + if let Component::RelativeSelectorAnchor = compound.first().unwrap() { + debug_assert!( + compound.len() == 1, + "RelativeLeft should only be a simple selector" + ); + combinators.next().unwrap().to_css_relative(dest)?; + continue; + } + + // 1. If there is only one simple selector in the compound selectors + // which is a universal selector, append the result of + // serializing the universal selector to s. + // + // Check if `!compound.empty()` first--this can happen if we have + // something like `... > ::before`, because we store `>` and `::` + // both as combinators internally. + // + // If we are in this case, after we have serialized the universal + // selector, we skip Step 2 and continue with the algorithm. + let (can_elide_namespace, first_non_namespace) = match compound[0] { + Component::ExplicitAnyNamespace | + Component::ExplicitNoNamespace | + Component::Namespace(..) => (false, 1), + Component::DefaultNamespace(..) => (true, 1), + _ => (true, 0), + }; + let mut perform_step_2 = true; + let next_combinator = combinators.next(); + if first_non_namespace == compound.len() - 1 { + match (next_combinator, &compound[first_non_namespace]) { + // We have to be careful here, because if there is a + // pseudo element "combinator" there isn't really just + // the one simple selector. Technically this compound + // selector contains the pseudo element selector as well + // -- Combinator::PseudoElement, just like + // Combinator::SlotAssignment, don't exist in the + // spec. + (Some(Combinator::PseudoElement), _) | + (Some(Combinator::SlotAssignment), _) => (), + (_, &Component::ExplicitUniversalType) => { + // Iterate over everything so we serialize the namespace + // too. + for simple in compound.iter() { + simple.to_css(dest)?; + } + // Skip step 2, which is an "otherwise". + perform_step_2 = false; + }, + _ => (), + } + } + + // 2. Otherwise, for each simple selector in the compound selectors + // that is not a universal selector of which the namespace prefix + // maps to a namespace that is not the default namespace + // serialize the simple selector and append the result to s. + // + // See https://github.com/w3c/csswg-drafts/issues/1606, which is + // proposing to change this to match up with the behavior asserted + // in cssom/serialize-namespaced-type-selectors.html, which the + // following code tries to match. + if perform_step_2 { + for simple in compound.iter() { + if let Component::ExplicitUniversalType = *simple { + // Can't have a namespace followed by a pseudo-element + // selector followed by a universal selector in the same + // compound selector, so we don't have to worry about the + // real namespace being in a different `compound`. + if can_elide_namespace { + continue; + } + } + simple.to_css(dest)?; + } + } + + // 3. If this is not the last part of the chain of the selector + // append a single SPACE (U+0020), followed by the combinator + // ">", "+", "~", ">>", "||", as appropriate, followed by another + // single SPACE (U+0020) if the combinator was not whitespace, to + // s. + match next_combinator { + Some(c) => c.to_css(dest)?, + None => combinators_exhausted = true, + }; + + // 4. If this is the last part of the chain of the selector and + // there is a pseudo-element, append "::" followed by the name of + // the pseudo-element, to s. + // + // (we handle this above) + } + + Ok(()) + } +} + +impl Combinator { + fn to_css_internal<W>(&self, dest: &mut W, prefix_space: bool) -> fmt::Result + where + W: fmt::Write, + { + if matches!( + *self, + Combinator::PseudoElement | Combinator::Part | Combinator::SlotAssignment + ) { + return Ok(()); + } + if prefix_space { + dest.write_char(' ')?; + } + match *self { + Combinator::Child => dest.write_str("> "), + Combinator::Descendant => Ok(()), + Combinator::NextSibling => dest.write_str("+ "), + Combinator::LaterSibling => dest.write_str("~ "), + Combinator::PseudoElement | Combinator::Part | Combinator::SlotAssignment => unsafe { + debug_unreachable!("Already handled") + }, + } + } + + fn to_css_relative<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + self.to_css_internal(dest, false) + } +} + +impl ToCss for Combinator { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + self.to_css_internal(dest, true) + } +} + +impl<Impl: SelectorImpl> ToCss for Component<Impl> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + use self::Component::*; + + match *self { + Combinator(ref c) => c.to_css(dest), + Slotted(ref selector) => { + dest.write_str("::slotted(")?; + selector.to_css(dest)?; + dest.write_char(')') + }, + Part(ref part_names) => { + dest.write_str("::part(")?; + for (i, name) in part_names.iter().enumerate() { + if i != 0 { + dest.write_char(' ')?; + } + name.to_css(dest)?; + } + dest.write_char(')') + }, + PseudoElement(ref p) => p.to_css(dest), + ID(ref s) => { + dest.write_char('#')?; + s.to_css(dest) + }, + Class(ref s) => { + dest.write_char('.')?; + s.to_css(dest) + }, + LocalName(ref s) => s.to_css(dest), + ExplicitUniversalType => dest.write_char('*'), + + DefaultNamespace(_) => Ok(()), + ExplicitNoNamespace => dest.write_char('|'), + ExplicitAnyNamespace => dest.write_str("*|"), + Namespace(ref prefix, _) => { + prefix.to_css(dest)?; + dest.write_char('|') + }, + + AttributeInNoNamespaceExists { ref local_name, .. } => { + dest.write_char('[')?; + local_name.to_css(dest)?; + dest.write_char(']') + }, + AttributeInNoNamespace { + ref local_name, + operator, + ref value, + case_sensitivity, + .. + } => { + dest.write_char('[')?; + local_name.to_css(dest)?; + operator.to_css(dest)?; + value.to_css(dest)?; + match case_sensitivity { + ParsedCaseSensitivity::CaseSensitive | + ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {}, + ParsedCaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?, + ParsedCaseSensitivity::ExplicitCaseSensitive => dest.write_str(" s")?, + } + dest.write_char(']') + }, + AttributeOther(ref attr_selector) => attr_selector.to_css(dest), + + // Pseudo-classes + Root => dest.write_str(":root"), + Empty => dest.write_str(":empty"), + Scope => dest.write_str(":scope"), + ParentSelector => dest.write_char('&'), + Host(ref selector) => { + dest.write_str(":host")?; + if let Some(ref selector) = *selector { + dest.write_char('(')?; + selector.to_css(dest)?; + dest.write_char(')')?; + } + Ok(()) + }, + Nth(ref nth_data) => { + nth_data.write_start(dest)?; + if nth_data.is_function { + nth_data.write_affine(dest)?; + dest.write_char(')')?; + } + Ok(()) + }, + NthOf(ref nth_of_data) => { + let nth_data = nth_of_data.nth_data(); + nth_data.write_start(dest)?; + debug_assert!( + nth_data.is_function, + "A selector must be a function to hold An+B notation" + ); + nth_data.write_affine(dest)?; + debug_assert!( + matches!(nth_data.ty, NthType::Child | NthType::LastChild), + "Only :nth-child or :nth-last-child can be of a selector list" + ); + debug_assert!( + !nth_of_data.selectors().is_empty(), + "The selector list should not be empty" + ); + dest.write_str(" of ")?; + serialize_selector_list(nth_of_data.selectors().iter(), dest)?; + dest.write_char(')') + }, + Is(ref list) | Where(ref list) | Negation(ref list) => { + match *self { + Where(..) => dest.write_str(":where(")?, + Is(..) => dest.write_str(":is(")?, + Negation(..) => dest.write_str(":not(")?, + _ => unreachable!(), + } + serialize_selector_list(list.slice().iter(), dest)?; + dest.write_str(")") + }, + Has(ref list) => { + dest.write_str(":has(")?; + let mut first = true; + for RelativeSelector { ref selector, .. } in list.iter() { + if !first { + dest.write_str(", ")?; + } + first = false; + selector.to_css(dest)?; + } + dest.write_str(")") + }, + NonTSPseudoClass(ref pseudo) => pseudo.to_css(dest), + Invalid(ref css) => dest.write_str(css), + RelativeSelectorAnchor => Ok(()), + } + } +} + +impl<Impl: SelectorImpl> ToCss for AttrSelectorWithOptionalNamespace<Impl> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dest.write_char('[')?; + match self.namespace { + Some(NamespaceConstraint::Specific((ref prefix, _))) => { + prefix.to_css(dest)?; + dest.write_char('|')? + }, + Some(NamespaceConstraint::Any) => dest.write_str("*|")?, + None => {}, + } + self.local_name.to_css(dest)?; + match self.operation { + ParsedAttrSelectorOperation::Exists => {}, + ParsedAttrSelectorOperation::WithValue { + operator, + case_sensitivity, + ref value, + } => { + operator.to_css(dest)?; + value.to_css(dest)?; + match case_sensitivity { + ParsedCaseSensitivity::CaseSensitive | + ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {}, + ParsedCaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?, + ParsedCaseSensitivity::ExplicitCaseSensitive => dest.write_str(" s")?, + } + }, + } + dest.write_char(']') + } +} + +impl<Impl: SelectorImpl> ToCss for LocalName<Impl> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + self.name.to_css(dest) + } +} + +/// Build up a Selector. +/// selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ; +/// +/// `Err` means invalid selector. +fn parse_selector<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + mut state: SelectorParsingState, + parse_relative: ParseRelative, +) -> Result<Selector<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + let mut builder = SelectorBuilder::default(); + + // Helps rewind less, but also simplifies dealing with relative combinators below. + input.skip_whitespace(); + + if parse_relative != ParseRelative::No { + let combinator = try_parse_combinator::<P, Impl>(input); + match parse_relative { + ParseRelative::ForHas => { + builder.push_simple_selector(Component::RelativeSelectorAnchor); + // Do we see a combinator? If so, push that. Otherwise, push a descendant + // combinator. + builder.push_combinator(combinator.unwrap_or(Combinator::Descendant)); + }, + ParseRelative::ForNesting => { + if let Ok(combinator) = combinator { + builder.push_simple_selector(Component::ParentSelector); + builder.push_combinator(combinator); + } + }, + ParseRelative::No => unreachable!(), + } + } + 'outer_loop: loop { + // Parse a sequence of simple selectors. + let empty = parse_compound_selector(parser, &mut state, input, &mut builder)?; + if empty { + return Err(input.new_custom_error(if builder.has_combinators() { + SelectorParseErrorKind::DanglingCombinator + } else { + SelectorParseErrorKind::EmptySelector + })); + } + + if state.intersects(SelectorParsingState::AFTER_PSEUDO) { + debug_assert!(state.intersects( + SelectorParsingState::AFTER_PSEUDO_ELEMENT | + SelectorParsingState::AFTER_SLOTTED | + SelectorParsingState::AFTER_PART + )); + break; + } + + let combinator = if let Ok(c) = try_parse_combinator::<P, Impl>(input) { + c + } else { + break 'outer_loop; + }; + + if !state.allows_combinators() { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + + builder.push_combinator(combinator); + } + return Ok(Selector(builder.build())); +} + +fn try_parse_combinator<'i, 't, P, Impl>(input: &mut CssParser<'i, 't>) -> Result<Combinator, ()> { + let mut any_whitespace = false; + loop { + let before_this_token = input.state(); + match input.next_including_whitespace() { + Err(_e) => return Err(()), + Ok(&Token::WhiteSpace(_)) => any_whitespace = true, + Ok(&Token::Delim('>')) => { + return Ok(Combinator::Child); + }, + Ok(&Token::Delim('+')) => { + return Ok(Combinator::NextSibling); + }, + Ok(&Token::Delim('~')) => { + return Ok(Combinator::LaterSibling); + }, + Ok(_) => { + input.reset(&before_this_token); + if any_whitespace { + return Ok(Combinator::Descendant); + } else { + return Err(()); + } + }, + } + } +} + +/// * `Err(())`: Invalid selector, abort +/// * `Ok(false)`: Not a type selector, could be something else. `input` was not consumed. +/// * `Ok(true)`: Length 0 (`*|*`), 1 (`*|E` or `ns|*`) or 2 (`|E` or `ns|E`) +fn parse_type_selector<'i, 't, P, Impl, S>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, + sink: &mut S, +) -> Result<bool, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, + S: Push<Component<Impl>>, +{ + match parse_qualified_name(parser, input, /* in_attr_selector = */ false) { + Err(ParseError { + kind: ParseErrorKind::Basic(BasicParseErrorKind::EndOfInput), + .. + }) | + Ok(OptionalQName::None(_)) => Ok(false), + Ok(OptionalQName::Some(namespace, local_name)) => { + if state.intersects(SelectorParsingState::AFTER_PSEUDO) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + match namespace { + QNamePrefix::ImplicitAnyNamespace => {}, + QNamePrefix::ImplicitDefaultNamespace(url) => { + sink.push(Component::DefaultNamespace(url)) + }, + QNamePrefix::ExplicitNamespace(prefix, url) => { + sink.push(match parser.default_namespace() { + Some(ref default_url) if url == *default_url => { + Component::DefaultNamespace(url) + }, + _ => Component::Namespace(prefix, url), + }) + }, + QNamePrefix::ExplicitNoNamespace => sink.push(Component::ExplicitNoNamespace), + QNamePrefix::ExplicitAnyNamespace => { + match parser.default_namespace() { + // Element type selectors that have no namespace + // component (no namespace separator) represent elements + // without regard to the element's namespace (equivalent + // to "*|") unless a default namespace has been declared + // for namespaced selectors (e.g. in CSS, in the style + // sheet). If a default namespace has been declared, + // such selectors will represent only elements in the + // default namespace. + // -- Selectors § 6.1.1 + // So we'll have this act the same as the + // QNamePrefix::ImplicitAnyNamespace case. + None => {}, + Some(_) => sink.push(Component::ExplicitAnyNamespace), + } + }, + QNamePrefix::ImplicitNoNamespace => { + unreachable!() // Not returned with in_attr_selector = false + }, + } + match local_name { + Some(name) => sink.push(Component::LocalName(LocalName { + lower_name: to_ascii_lowercase(&name).as_ref().into(), + name: name.as_ref().into(), + })), + None => sink.push(Component::ExplicitUniversalType), + } + Ok(true) + }, + Err(e) => Err(e), + } +} + +#[derive(Debug)] +enum SimpleSelectorParseResult<Impl: SelectorImpl> { + SimpleSelector(Component<Impl>), + PseudoElement(Impl::PseudoElement), + SlottedPseudo(Selector<Impl>), + PartPseudo(Box<[Impl::Identifier]>), +} + +#[derive(Debug)] +enum QNamePrefix<Impl: SelectorImpl> { + ImplicitNoNamespace, // `foo` in attr selectors + ImplicitAnyNamespace, // `foo` in type selectors, without a default ns + ImplicitDefaultNamespace(Impl::NamespaceUrl), // `foo` in type selectors, with a default ns + ExplicitNoNamespace, // `|foo` + ExplicitAnyNamespace, // `*|foo` + ExplicitNamespace(Impl::NamespacePrefix, Impl::NamespaceUrl), // `prefix|foo` +} + +enum OptionalQName<'i, Impl: SelectorImpl> { + Some(QNamePrefix<Impl>, Option<CowRcStr<'i>>), + None(Token<'i>), +} + +/// * `Err(())`: Invalid selector, abort +/// * `Ok(None(token))`: Not a simple selector, could be something else. `input` was not consumed, +/// but the token is still returned. +/// * `Ok(Some(namespace, local_name))`: `None` for the local name means a `*` universal selector +fn parse_qualified_name<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + in_attr_selector: bool, +) -> Result<OptionalQName<'i, Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + let default_namespace = |local_name| { + let namespace = match parser.default_namespace() { + Some(url) => QNamePrefix::ImplicitDefaultNamespace(url), + None => QNamePrefix::ImplicitAnyNamespace, + }; + Ok(OptionalQName::Some(namespace, local_name)) + }; + + let explicit_namespace = |input: &mut CssParser<'i, 't>, namespace| { + let location = input.current_source_location(); + match input.next_including_whitespace() { + Ok(&Token::Delim('*')) if !in_attr_selector => Ok(OptionalQName::Some(namespace, None)), + Ok(&Token::Ident(ref local_name)) => { + Ok(OptionalQName::Some(namespace, Some(local_name.clone()))) + }, + Ok(t) if in_attr_selector => { + let e = SelectorParseErrorKind::InvalidQualNameInAttr(t.clone()); + Err(location.new_custom_error(e)) + }, + Ok(t) => Err(location.new_custom_error( + SelectorParseErrorKind::ExplicitNamespaceUnexpectedToken(t.clone()), + )), + Err(e) => Err(e.into()), + } + }; + + let start = input.state(); + match input.next_including_whitespace() { + Ok(Token::Ident(value)) => { + let value = value.clone(); + let after_ident = input.state(); + match input.next_including_whitespace() { + Ok(&Token::Delim('|')) => { + let prefix = value.as_ref().into(); + let result = parser.namespace_for_prefix(&prefix); + let url = result.ok_or( + after_ident + .source_location() + .new_custom_error(SelectorParseErrorKind::ExpectedNamespace(value)), + )?; + explicit_namespace(input, QNamePrefix::ExplicitNamespace(prefix, url)) + }, + _ => { + input.reset(&after_ident); + if in_attr_selector { + Ok(OptionalQName::Some( + QNamePrefix::ImplicitNoNamespace, + Some(value), + )) + } else { + default_namespace(Some(value)) + } + }, + } + }, + Ok(Token::Delim('*')) => { + let after_star = input.state(); + match input.next_including_whitespace() { + Ok(&Token::Delim('|')) => { + explicit_namespace(input, QNamePrefix::ExplicitAnyNamespace) + }, + _ if !in_attr_selector => { + input.reset(&after_star); + default_namespace(None) + }, + result => { + let t = result?; + Err(after_star + .source_location() + .new_custom_error(SelectorParseErrorKind::ExpectedBarInAttr(t.clone()))) + }, + } + }, + Ok(Token::Delim('|')) => explicit_namespace(input, QNamePrefix::ExplicitNoNamespace), + Ok(t) => { + let t = t.clone(); + input.reset(&start); + Ok(OptionalQName::None(t)) + }, + Err(e) => { + input.reset(&start); + Err(e.into()) + }, + } +} + +fn parse_attribute_selector<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + let namespace; + let local_name; + + input.skip_whitespace(); + + match parse_qualified_name(parser, input, /* in_attr_selector = */ true)? { + OptionalQName::None(t) => { + return Err(input.new_custom_error( + SelectorParseErrorKind::NoQualifiedNameInAttributeSelector(t), + )); + }, + OptionalQName::Some(_, None) => unreachable!(), + OptionalQName::Some(ns, Some(ln)) => { + local_name = ln; + namespace = match ns { + QNamePrefix::ImplicitNoNamespace | QNamePrefix::ExplicitNoNamespace => None, + QNamePrefix::ExplicitNamespace(prefix, url) => { + Some(NamespaceConstraint::Specific((prefix, url))) + }, + QNamePrefix::ExplicitAnyNamespace => Some(NamespaceConstraint::Any), + QNamePrefix::ImplicitAnyNamespace | QNamePrefix::ImplicitDefaultNamespace(_) => { + unreachable!() // Not returned with in_attr_selector = true + }, + } + }, + } + + let location = input.current_source_location(); + let operator = match input.next() { + // [foo] + Err(_) => { + let local_name_lower = to_ascii_lowercase(&local_name).as_ref().into(); + let local_name = local_name.as_ref().into(); + if let Some(namespace) = namespace { + return Ok(Component::AttributeOther(Box::new( + AttrSelectorWithOptionalNamespace { + namespace: Some(namespace), + local_name, + local_name_lower, + operation: ParsedAttrSelectorOperation::Exists, + }, + ))); + } else { + return Ok(Component::AttributeInNoNamespaceExists { + local_name, + local_name_lower, + }); + } + }, + + // [foo=bar] + Ok(&Token::Delim('=')) => AttrSelectorOperator::Equal, + // [foo~=bar] + Ok(&Token::IncludeMatch) => AttrSelectorOperator::Includes, + // [foo|=bar] + Ok(&Token::DashMatch) => AttrSelectorOperator::DashMatch, + // [foo^=bar] + Ok(&Token::PrefixMatch) => AttrSelectorOperator::Prefix, + // [foo*=bar] + Ok(&Token::SubstringMatch) => AttrSelectorOperator::Substring, + // [foo$=bar] + Ok(&Token::SuffixMatch) => AttrSelectorOperator::Suffix, + Ok(t) => { + return Err(location.new_custom_error( + SelectorParseErrorKind::UnexpectedTokenInAttributeSelector(t.clone()), + )); + }, + }; + + let value = match input.expect_ident_or_string() { + Ok(t) => t.clone(), + Err(BasicParseError { + kind: BasicParseErrorKind::UnexpectedToken(t), + location, + }) => return Err(location.new_custom_error(SelectorParseErrorKind::BadValueInAttr(t))), + Err(e) => return Err(e.into()), + }; + + let attribute_flags = parse_attribute_flags(input)?; + let value = value.as_ref().into(); + let local_name_lower; + let local_name_is_ascii_lowercase; + let case_sensitivity; + { + let local_name_lower_cow = to_ascii_lowercase(&local_name); + case_sensitivity = + attribute_flags.to_case_sensitivity(local_name_lower_cow.as_ref(), namespace.is_some()); + local_name_lower = local_name_lower_cow.as_ref().into(); + local_name_is_ascii_lowercase = matches!(local_name_lower_cow, Cow::Borrowed(..)); + } + let local_name = local_name.as_ref().into(); + if namespace.is_some() || !local_name_is_ascii_lowercase { + Ok(Component::AttributeOther(Box::new( + AttrSelectorWithOptionalNamespace { + namespace, + local_name, + local_name_lower, + operation: ParsedAttrSelectorOperation::WithValue { + operator, + case_sensitivity, + value, + }, + }, + ))) + } else { + Ok(Component::AttributeInNoNamespace { + local_name, + operator, + value, + case_sensitivity, + }) + } +} + +/// An attribute selector can have 's' or 'i' as flags, or no flags at all. +enum AttributeFlags { + // Matching should be case-sensitive ('s' flag). + CaseSensitive, + // Matching should be case-insensitive ('i' flag). + AsciiCaseInsensitive, + // No flags. Matching behavior depends on the name of the attribute. + CaseSensitivityDependsOnName, +} + +impl AttributeFlags { + fn to_case_sensitivity(self, local_name: &str, have_namespace: bool) -> ParsedCaseSensitivity { + match self { + AttributeFlags::CaseSensitive => ParsedCaseSensitivity::ExplicitCaseSensitive, + AttributeFlags::AsciiCaseInsensitive => ParsedCaseSensitivity::AsciiCaseInsensitive, + AttributeFlags::CaseSensitivityDependsOnName => { + if !have_namespace && + include!(concat!( + env!("OUT_DIR"), + "/ascii_case_insensitive_html_attributes.rs" + )) + .contains(local_name) + { + ParsedCaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument + } else { + ParsedCaseSensitivity::CaseSensitive + } + }, + } + } +} + +fn parse_attribute_flags<'i, 't>( + input: &mut CssParser<'i, 't>, +) -> Result<AttributeFlags, BasicParseError<'i>> { + let location = input.current_source_location(); + let token = match input.next() { + Ok(t) => t, + Err(..) => { + // Selectors spec says language-defined; HTML says it depends on the + // exact attribute name. + return Ok(AttributeFlags::CaseSensitivityDependsOnName); + }, + }; + + let ident = match *token { + Token::Ident(ref i) => i, + ref other => return Err(location.new_basic_unexpected_token_error(other.clone())), + }; + + Ok(match_ignore_ascii_case! { + ident, + "i" => AttributeFlags::AsciiCaseInsensitive, + "s" => AttributeFlags::CaseSensitive, + _ => return Err(location.new_basic_unexpected_token_error(token.clone())), + }) +} + +/// Level 3: Parse **one** simple_selector. (Though we might insert a second +/// implied "<defaultns>|*" type selector.) +fn parse_negation<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + let list = SelectorList::parse_with_state( + parser, + input, + state | + SelectorParsingState::SKIP_DEFAULT_NAMESPACE | + SelectorParsingState::DISALLOW_PSEUDOS, + ForgivingParsing::No, + ParseRelative::No, + )?; + + Ok(Component::Negation(list)) +} + +/// simple_selector_sequence +/// : [ type_selector | universal ] [ HASH | class | attrib | pseudo | negation ]* +/// | [ HASH | class | attrib | pseudo | negation ]+ +/// +/// `Err(())` means invalid selector. +/// `Ok(true)` is an empty selector +fn parse_compound_selector<'i, 't, P, Impl>( + parser: &P, + state: &mut SelectorParsingState, + input: &mut CssParser<'i, 't>, + builder: &mut SelectorBuilder<Impl>, +) -> Result<bool, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + input.skip_whitespace(); + + let mut empty = true; + if parse_type_selector(parser, input, *state, builder)? { + empty = false; + } + + loop { + let result = match parse_one_simple_selector(parser, input, *state)? { + None => break, + Some(result) => result, + }; + + if empty { + if let Some(url) = parser.default_namespace() { + // If there was no explicit type selector, but there is a + // default namespace, there is an implicit "<defaultns>|*" type + // selector. Except for :host() or :not() / :is() / :where(), + // where we ignore it. + // + // https://drafts.csswg.org/css-scoping/#host-element-in-tree: + // + // When considered within its own shadow trees, the shadow + // host is featureless. Only the :host, :host(), and + // :host-context() pseudo-classes are allowed to match it. + // + // https://drafts.csswg.org/selectors-4/#featureless: + // + // A featureless element does not match any selector at all, + // except those it is explicitly defined to match. If a + // given selector is allowed to match a featureless element, + // it must do so while ignoring the default namespace. + // + // https://drafts.csswg.org/selectors-4/#matches + // + // Default namespace declarations do not affect the compound + // selector representing the subject of any selector within + // a :is() pseudo-class, unless that compound selector + // contains an explicit universal selector or type selector. + // + // (Similar quotes for :where() / :not()) + // + let ignore_default_ns = state + .intersects(SelectorParsingState::SKIP_DEFAULT_NAMESPACE) || + matches!( + result, + SimpleSelectorParseResult::SimpleSelector(Component::Host(..)) + ); + if !ignore_default_ns { + builder.push_simple_selector(Component::DefaultNamespace(url)); + } + } + } + + empty = false; + + match result { + SimpleSelectorParseResult::SimpleSelector(s) => { + builder.push_simple_selector(s); + }, + SimpleSelectorParseResult::PartPseudo(part_names) => { + state.insert(SelectorParsingState::AFTER_PART); + builder.push_combinator(Combinator::Part); + builder.push_simple_selector(Component::Part(part_names)); + }, + SimpleSelectorParseResult::SlottedPseudo(selector) => { + state.insert(SelectorParsingState::AFTER_SLOTTED); + builder.push_combinator(Combinator::SlotAssignment); + builder.push_simple_selector(Component::Slotted(selector)); + }, + SimpleSelectorParseResult::PseudoElement(p) => { + state.insert(SelectorParsingState::AFTER_PSEUDO_ELEMENT); + if !p.accepts_state_pseudo_classes() { + state.insert(SelectorParsingState::AFTER_NON_STATEFUL_PSEUDO_ELEMENT); + } + builder.push_combinator(Combinator::PseudoElement); + builder.push_simple_selector(Component::PseudoElement(p)); + }, + } + } + Ok(empty) +} + +fn parse_is_where<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, + component: impl FnOnce(SelectorList<Impl>) -> Component<Impl>, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + debug_assert!(parser.parse_is_and_where()); + // https://drafts.csswg.org/selectors/#matches-pseudo: + // + // Pseudo-elements cannot be represented by the matches-any + // pseudo-class; they are not valid within :is(). + // + let inner = SelectorList::parse_with_state( + parser, + input, + state | + SelectorParsingState::SKIP_DEFAULT_NAMESPACE | + SelectorParsingState::DISALLOW_PSEUDOS, + ForgivingParsing::Yes, + ParseRelative::No, + )?; + Ok(component(inner)) +} + +fn parse_has<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + debug_assert!(parser.parse_has()); + if state.intersects(SelectorParsingState::DISALLOW_RELATIVE_SELECTOR) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + // Nested `:has()` is disallowed, mark it as such. + // Note: The spec defines ":has-allowed pseudo-element," but there's no + // pseudo-element defined as such at the moment. + // https://w3c.github.io/csswg-drafts/selectors-4/#has-allowed-pseudo-element + let inner = SelectorList::parse_with_state( + parser, + input, + state | + SelectorParsingState::SKIP_DEFAULT_NAMESPACE | + SelectorParsingState::DISALLOW_PSEUDOS | + SelectorParsingState::DISALLOW_RELATIVE_SELECTOR, + ForgivingParsing::No, + ParseRelative::ForHas, + )?; + Ok(Component::Has(RelativeSelector::from_selector_list(inner))) +} + +fn parse_functional_pseudo_class<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + name: CowRcStr<'i>, + state: SelectorParsingState, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + match_ignore_ascii_case! { &name, + "nth-child" => return parse_nth_pseudo_class(parser, input, state, NthType::Child), + "nth-of-type" => return parse_nth_pseudo_class(parser, input, state, NthType::OfType), + "nth-last-child" => return parse_nth_pseudo_class(parser, input, state, NthType::LastChild), + "nth-last-of-type" => return parse_nth_pseudo_class(parser, input, state, NthType::LastOfType), + "is" if parser.parse_is_and_where() => return parse_is_where(parser, input, state, Component::Is), + "where" if parser.parse_is_and_where() => return parse_is_where(parser, input, state, Component::Where), + "has" if parser.parse_has() => return parse_has(parser, input, state), + "host" => { + if !state.allows_tree_structural_pseudo_classes() { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + return Ok(Component::Host(Some(parse_inner_compound_selector(parser, input, state)?))); + }, + "not" => { + return parse_negation(parser, input, state) + }, + _ => {} + } + + if parser.parse_is_and_where() && parser.is_is_alias(&name) { + return parse_is_where(parser, input, state, Component::Is); + } + + if state.intersects(SelectorParsingState::AFTER_PSEUDO_ELEMENT | SelectorParsingState::AFTER_SLOTTED) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + + let after_part = state.intersects(SelectorParsingState::AFTER_PART); + P::parse_non_ts_functional_pseudo_class(parser, name, input, after_part).map(Component::NonTSPseudoClass) +} + +fn parse_nth_pseudo_class<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, + ty: NthType, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + if !state.allows_tree_structural_pseudo_classes() { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + let (a, b) = parse_nth(input)?; + let nth_data = NthSelectorData { + ty, + is_function: true, + a, + b, + }; + if !parser.parse_nth_child_of() || ty.is_of_type() { + return Ok(Component::Nth(nth_data)); + } + + // Try to parse "of <selector-list>". + if input.try_parse(|i| i.expect_ident_matching("of")).is_err() { + return Ok(Component::Nth(nth_data)); + } + // Whitespace between "of" and the selector list is optional + // https://github.com/w3c/csswg-drafts/issues/8285 + let selectors = SelectorList::parse_with_state( + parser, + input, + state | + SelectorParsingState::SKIP_DEFAULT_NAMESPACE | + SelectorParsingState::DISALLOW_PSEUDOS, + ForgivingParsing::No, + ParseRelative::No, + )?; + Ok(Component::NthOf(NthOfSelectorData::new( + &nth_data, + selectors.slice().iter().cloned(), + ))) +} + +/// Returns whether the name corresponds to a CSS2 pseudo-element that +/// can be specified with the single colon syntax (in addition to the +/// double-colon syntax, which can be used for all pseudo-elements). +fn is_css2_pseudo_element(name: &str) -> bool { + // ** Do not add to this list! ** + match_ignore_ascii_case! { name, + "before" | "after" | "first-line" | "first-letter" => true, + _ => false, + } +} + +/// Parse a simple selector other than a type selector. +/// +/// * `Err(())`: Invalid selector, abort +/// * `Ok(None)`: Not a simple selector, could be something else. `input` was not consumed. +/// * `Ok(Some(_))`: Parsed a simple selector or pseudo-element +fn parse_one_simple_selector<'i, 't, P, Impl>( + parser: &P, + input: &mut CssParser<'i, 't>, + state: SelectorParsingState, +) -> Result<Option<SimpleSelectorParseResult<Impl>>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + let start = input.state(); + let token = match input.next_including_whitespace().map(|t| t.clone()) { + Ok(t) => t, + Err(..) => { + input.reset(&start); + return Ok(None); + }, + }; + + Ok(Some(match token { + Token::IDHash(id) => { + if state.intersects(SelectorParsingState::AFTER_PSEUDO) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + let id = Component::ID(id.as_ref().into()); + SimpleSelectorParseResult::SimpleSelector(id) + }, + Token::Delim(delim) if delim == '.' || (delim == '&' && parser.parse_parent_selector()) => { + if state.intersects(SelectorParsingState::AFTER_PSEUDO) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + let location = input.current_source_location(); + SimpleSelectorParseResult::SimpleSelector(if delim == '&' { + Component::ParentSelector + } else { + let class = match *input.next_including_whitespace()? { + Token::Ident(ref class) => class, + ref t => { + let e = SelectorParseErrorKind::ClassNeedsIdent(t.clone()); + return Err(location.new_custom_error(e)); + }, + }; + Component::Class(class.as_ref().into()) + }) + }, + Token::SquareBracketBlock => { + if state.intersects(SelectorParsingState::AFTER_PSEUDO) { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + let attr = input.parse_nested_block(|input| parse_attribute_selector(parser, input))?; + SimpleSelectorParseResult::SimpleSelector(attr) + }, + Token::Colon => { + let location = input.current_source_location(); + let (is_single_colon, next_token) = match input.next_including_whitespace()?.clone() { + Token::Colon => (false, input.next_including_whitespace()?.clone()), + t => (true, t), + }; + let (name, is_functional) = match next_token { + Token::Ident(name) => (name, false), + Token::Function(name) => (name, true), + t => { + let e = SelectorParseErrorKind::PseudoElementExpectedIdent(t); + return Err(input.new_custom_error(e)); + }, + }; + let is_pseudo_element = !is_single_colon || is_css2_pseudo_element(&name); + if is_pseudo_element { + if !state.allows_pseudos() { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + let pseudo_element = if is_functional { + if P::parse_part(parser) && name.eq_ignore_ascii_case("part") { + if !state.allows_part() { + return Err( + input.new_custom_error(SelectorParseErrorKind::InvalidState) + ); + } + let names = input.parse_nested_block(|input| { + let mut result = Vec::with_capacity(1); + result.push(input.expect_ident()?.as_ref().into()); + while !input.is_exhausted() { + result.push(input.expect_ident()?.as_ref().into()); + } + Ok(result.into_boxed_slice()) + })?; + return Ok(Some(SimpleSelectorParseResult::PartPseudo(names))); + } + if P::parse_slotted(parser) && name.eq_ignore_ascii_case("slotted") { + if !state.allows_slotted() { + return Err( + input.new_custom_error(SelectorParseErrorKind::InvalidState) + ); + } + let selector = input.parse_nested_block(|input| { + parse_inner_compound_selector(parser, input, state) + })?; + return Ok(Some(SimpleSelectorParseResult::SlottedPseudo(selector))); + } + input.parse_nested_block(|input| { + P::parse_functional_pseudo_element(parser, name, input) + })? + } else { + P::parse_pseudo_element(parser, location, name)? + }; + + if state.intersects(SelectorParsingState::AFTER_SLOTTED) && + !pseudo_element.valid_after_slotted() + { + return Err(input.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + SimpleSelectorParseResult::PseudoElement(pseudo_element) + } else { + let pseudo_class = if is_functional { + input.parse_nested_block(|input| { + parse_functional_pseudo_class(parser, input, name, state) + })? + } else { + parse_simple_pseudo_class(parser, location, name, state)? + }; + SimpleSelectorParseResult::SimpleSelector(pseudo_class) + } + }, + _ => { + input.reset(&start); + return Ok(None); + }, + })) +} + +fn parse_simple_pseudo_class<'i, P, Impl>( + parser: &P, + location: SourceLocation, + name: CowRcStr<'i>, + state: SelectorParsingState, +) -> Result<Component<Impl>, ParseError<'i, P::Error>> +where + P: Parser<'i, Impl = Impl>, + Impl: SelectorImpl, +{ + if !state.allows_non_functional_pseudo_classes() { + return Err(location.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + + if state.allows_tree_structural_pseudo_classes() { + match_ignore_ascii_case! { &name, + "first-child" => return Ok(Component::Nth(NthSelectorData::first(/* of_type = */ false))), + "last-child" => return Ok(Component::Nth(NthSelectorData::last(/* of_type = */ false))), + "only-child" => return Ok(Component::Nth(NthSelectorData::only(/* of_type = */ false))), + "root" => return Ok(Component::Root), + "empty" => return Ok(Component::Empty), + "scope" => return Ok(Component::Scope), + "host" if P::parse_host(parser) => return Ok(Component::Host(None)), + "first-of-type" => return Ok(Component::Nth(NthSelectorData::first(/* of_type = */ true))), + "last-of-type" => return Ok(Component::Nth(NthSelectorData::last(/* of_type = */ true))), + "only-of-type" => return Ok(Component::Nth(NthSelectorData::only(/* of_type = */ true))), + _ => {}, + } + } + + let pseudo_class = P::parse_non_ts_pseudo_class(parser, location, name)?; + if state.intersects(SelectorParsingState::AFTER_PSEUDO_ELEMENT) && + !pseudo_class.is_user_action_state() + { + return Err(location.new_custom_error(SelectorParseErrorKind::InvalidState)); + } + Ok(Component::NonTSPseudoClass(pseudo_class)) +} + +// NB: pub module in order to access the DummyParser +#[cfg(test)] +pub mod tests { + use super::*; + use crate::builder::SelectorFlags; + use crate::parser; + use cssparser::{serialize_identifier, Parser as CssParser, ParserInput, ToCss}; + use std::collections::HashMap; + use std::fmt; + + #[derive(Clone, Debug, Eq, PartialEq)] + pub enum PseudoClass { + Hover, + Active, + Lang(String), + } + + #[derive(Clone, Debug, Eq, PartialEq)] + pub enum PseudoElement { + Before, + After, + Highlight(String), + } + + impl parser::PseudoElement for PseudoElement { + type Impl = DummySelectorImpl; + + fn accepts_state_pseudo_classes(&self) -> bool { + true + } + + fn valid_after_slotted(&self) -> bool { + true + } + } + + impl parser::NonTSPseudoClass for PseudoClass { + type Impl = DummySelectorImpl; + + #[inline] + fn is_active_or_hover(&self) -> bool { + matches!(*self, PseudoClass::Active | PseudoClass::Hover) + } + + #[inline] + fn is_user_action_state(&self) -> bool { + self.is_active_or_hover() + } + } + + impl ToCss for PseudoClass { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + match *self { + PseudoClass::Hover => dest.write_str(":hover"), + PseudoClass::Active => dest.write_str(":active"), + PseudoClass::Lang(ref lang) => { + dest.write_str(":lang(")?; + serialize_identifier(lang, dest)?; + dest.write_char(')') + }, + } + } + } + + impl ToCss for PseudoElement { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + match *self { + PseudoElement::Before => dest.write_str("::before"), + PseudoElement::After => dest.write_str("::after"), + PseudoElement::Highlight(ref name) => { + dest.write_str("::highlight(")?; + serialize_identifier(&name, dest)?; + dest.write_char(')') + }, + } + } + } + + #[derive(Clone, Debug, PartialEq)] + pub struct DummySelectorImpl; + + #[derive(Default)] + pub struct DummyParser { + default_ns: Option<DummyAtom>, + ns_prefixes: HashMap<DummyAtom, DummyAtom>, + } + + impl DummyParser { + fn default_with_namespace(default_ns: DummyAtom) -> DummyParser { + DummyParser { + default_ns: Some(default_ns), + ns_prefixes: Default::default(), + } + } + } + + impl SelectorImpl for DummySelectorImpl { + type ExtraMatchingData<'a> = std::marker::PhantomData<&'a ()>; + type AttrValue = DummyAttrValue; + type Identifier = DummyAtom; + type LocalName = DummyAtom; + type NamespaceUrl = DummyAtom; + type NamespacePrefix = DummyAtom; + type BorrowedLocalName = DummyAtom; + type BorrowedNamespaceUrl = DummyAtom; + type NonTSPseudoClass = PseudoClass; + type PseudoElement = PseudoElement; + } + + #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] + pub struct DummyAttrValue(String); + + impl ToCss for DummyAttrValue { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + use std::fmt::Write; + + dest.write_char('"')?; + write!(cssparser::CssStringWriter::new(dest), "{}", &self.0)?; + dest.write_char('"') + } + } + + impl<'a> From<&'a str> for DummyAttrValue { + fn from(string: &'a str) -> Self { + Self(string.into()) + } + } + + #[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] + pub struct DummyAtom(String); + + impl ToCss for DummyAtom { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + serialize_identifier(&self.0, dest) + } + } + + impl From<String> for DummyAtom { + fn from(string: String) -> Self { + DummyAtom(string) + } + } + + impl<'a> From<&'a str> for DummyAtom { + fn from(string: &'a str) -> Self { + DummyAtom(string.into()) + } + } + + impl PrecomputedHash for DummyAtom { + fn precomputed_hash(&self) -> u32 { + self.0.as_ptr() as u32 + } + } + + impl<'i> Parser<'i> for DummyParser { + type Impl = DummySelectorImpl; + type Error = SelectorParseErrorKind<'i>; + + fn parse_slotted(&self) -> bool { + true + } + + fn parse_nth_child_of(&self) -> bool { + true + } + + fn parse_is_and_where(&self) -> bool { + true + } + + fn parse_has(&self) -> bool { + true + } + + fn parse_parent_selector(&self) -> bool { + true + } + + fn parse_part(&self) -> bool { + true + } + + fn parse_non_ts_pseudo_class( + &self, + location: SourceLocation, + name: CowRcStr<'i>, + ) -> Result<PseudoClass, SelectorParseError<'i>> { + match_ignore_ascii_case! { &name, + "hover" => return Ok(PseudoClass::Hover), + "active" => return Ok(PseudoClass::Active), + _ => {} + } + Err( + location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_non_ts_functional_pseudo_class<'t>( + &self, + name: CowRcStr<'i>, + parser: &mut CssParser<'i, 't>, + after_part: bool, + ) -> Result<PseudoClass, SelectorParseError<'i>> { + match_ignore_ascii_case! { &name, + "lang" if !after_part => { + let lang = parser.expect_ident_or_string()?.as_ref().to_owned(); + return Ok(PseudoClass::Lang(lang)); + }, + _ => {} + } + Err( + parser.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_pseudo_element( + &self, + location: SourceLocation, + name: CowRcStr<'i>, + ) -> Result<PseudoElement, SelectorParseError<'i>> { + match_ignore_ascii_case! { &name, + "before" => return Ok(PseudoElement::Before), + "after" => return Ok(PseudoElement::After), + _ => {} + } + Err( + location.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn parse_functional_pseudo_element<'t>( + &self, + name: CowRcStr<'i>, + parser: &mut CssParser<'i, 't>, + ) -> Result<PseudoElement, SelectorParseError<'i>> { + match_ignore_ascii_case! {&name, + "highlight" => return Ok(PseudoElement::Highlight(parser.expect_ident()?.as_ref().to_owned())), + _ => {} + } + Err( + parser.new_custom_error(SelectorParseErrorKind::UnsupportedPseudoClassOrElement( + name, + )), + ) + } + + fn default_namespace(&self) -> Option<DummyAtom> { + self.default_ns.clone() + } + + fn namespace_for_prefix(&self, prefix: &DummyAtom) -> Option<DummyAtom> { + self.ns_prefixes.get(prefix).cloned() + } + } + + fn parse<'i>( + input: &'i str, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_relative(input, ParseRelative::No) + } + + fn parse_relative<'i>( + input: &'i str, + parse_relative: ParseRelative, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_relative(input, &DummyParser::default(), parse_relative) + } + + fn parse_expected<'i, 'a>( + input: &'i str, + expected: Option<&'a str>, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_expected(input, &DummyParser::default(), expected) + } + + fn parse_relative_expected<'i, 'a>( + input: &'i str, + parse_relative: ParseRelative, + expected: Option<&'a str>, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_relative_expected(input, &DummyParser::default(), parse_relative, expected) + } + + fn parse_ns<'i>( + input: &'i str, + parser: &DummyParser, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_relative(input, parser, ParseRelative::No) + } + + fn parse_ns_relative<'i>( + input: &'i str, + parser: &DummyParser, + parse_relative: ParseRelative, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_relative_expected(input, parser, parse_relative, None) + } + + fn parse_ns_expected<'i, 'a>( + input: &'i str, + parser: &DummyParser, + expected: Option<&'a str>, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + parse_ns_relative_expected(input, parser, ParseRelative::No, expected) + } + + fn parse_ns_relative_expected<'i, 'a>( + input: &'i str, + parser: &DummyParser, + parse_relative: ParseRelative, + expected: Option<&'a str>, + ) -> Result<SelectorList<DummySelectorImpl>, SelectorParseError<'i>> { + let mut parser_input = ParserInput::new(input); + let result = SelectorList::parse( + parser, + &mut CssParser::new(&mut parser_input), + parse_relative, + ); + if let Ok(ref selectors) = result { + // We can't assume that the serialized parsed selector will equal + // the input; for example, if there is no default namespace, '*|foo' + // should serialize to 'foo'. + assert_eq!( + selectors.to_css_string(), + match expected { + Some(x) => x, + None => input, + } + ); + } + result + } + + fn specificity(a: u32, b: u32, c: u32) -> u32 { + a << 20 | b << 10 | c + } + + #[test] + fn test_empty() { + let mut input = ParserInput::new(":empty"); + let list = SelectorList::parse( + &DummyParser::default(), + &mut CssParser::new(&mut input), + ParseRelative::No, + ); + assert!(list.is_ok()); + } + + const MATHML: &str = "http://www.w3.org/1998/Math/MathML"; + const SVG: &str = "http://www.w3.org/2000/svg"; + + #[test] + fn test_parsing() { + assert!(parse("").is_err()); + assert!(parse(":lang(4)").is_err()); + assert!(parse(":lang(en US)").is_err()); + assert_eq!( + parse("EeÉ"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::LocalName(LocalName { + name: DummyAtom::from("EeÉ"), + lower_name: DummyAtom::from("eeÉ"), + })], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("|e"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ExplicitNoNamespace, + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // When the default namespace is not set, *| should be elided. + // https://github.com/servo/servo/pull/17537 + assert_eq!( + parse_expected("*|e", Some("e")), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + })], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // When the default namespace is set, *| should _not_ be elided (as foo + // is no longer equivalent to *|foo--the former is only for foo in the + // default namespace). + // https://github.com/servo/servo/issues/16020 + assert_eq!( + parse_ns( + "*|e", + &DummyParser::default_with_namespace(DummyAtom::from("https://mozilla.org")) + ), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ExplicitAnyNamespace, + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("*"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::ExplicitUniversalType], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("|*"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ExplicitNoNamespace, + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_expected("*|*", Some("*")), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::ExplicitUniversalType], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns( + "*|*", + &DummyParser::default_with_namespace(DummyAtom::from("https://mozilla.org")) + ), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ExplicitAnyNamespace, + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse(".foo:lang(en-US)"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Class(DummyAtom::from("foo")), + Component::NonTSPseudoClass(PseudoClass::Lang("en-US".to_owned())), + ], + specificity(0, 2, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("#bar"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::ID(DummyAtom::from("bar"))], + specificity(1, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("e.foo#bar"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + Component::Class(DummyAtom::from("foo")), + Component::ID(DummyAtom::from("bar")), + ], + specificity(1, 1, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("e.foo #bar"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + Component::Class(DummyAtom::from("foo")), + Component::Combinator(Combinator::Descendant), + Component::ID(DummyAtom::from("bar")), + ], + specificity(1, 1, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // Default namespace does not apply to attribute selectors + // https://github.com/mozilla/servo/pull/1652 + let mut parser = DummyParser::default(); + assert_eq!( + parse_ns("[Foo]", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::AttributeInNoNamespaceExists { + local_name: DummyAtom::from("Foo"), + local_name_lower: DummyAtom::from("foo"), + }], + specificity(0, 1, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert!(parse_ns("svg|circle", &parser).is_err()); + parser + .ns_prefixes + .insert(DummyAtom("svg".into()), DummyAtom(SVG.into())); + assert_eq!( + parse_ns("svg|circle", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Namespace(DummyAtom("svg".into()), SVG.into()), + Component::LocalName(LocalName { + name: DummyAtom::from("circle"), + lower_name: DummyAtom::from("circle"), + }), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns("svg|*", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Namespace(DummyAtom("svg".into()), SVG.into()), + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // Default namespace does not apply to attribute selectors + // https://github.com/mozilla/servo/pull/1652 + // but it does apply to implicit type selectors + // https://github.com/servo/rust-selectors/pull/82 + parser.default_ns = Some(MATHML.into()); + assert_eq!( + parse_ns("[Foo]", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::AttributeInNoNamespaceExists { + local_name: DummyAtom::from("Foo"), + local_name_lower: DummyAtom::from("foo"), + }, + ], + specificity(0, 1, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // Default namespace does apply to type selectors + assert_eq!( + parse_ns("e", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns("*", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns("*|*", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ExplicitAnyNamespace, + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // Default namespace applies to universal and type selectors inside :not and :matches, + // but not otherwise. + assert_eq!( + parse_ns(":not(.cl)", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::Negation(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::Class(DummyAtom::from("cl"))], + specificity(0, 1, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])), + ], + specificity(0, 1, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns(":not(*)", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::Negation(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )]),), + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns(":not(e)", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::Negation(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::DefaultNamespace(MATHML.into()), + Component::LocalName(LocalName { + name: DummyAtom::from("e"), + lower_name: DummyAtom::from("e"), + }), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse("[attr|=\"foo\"]"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::AttributeInNoNamespace { + local_name: DummyAtom::from("attr"), + operator: AttrSelectorOperator::DashMatch, + value: DummyAttrValue::from("foo"), + case_sensitivity: ParsedCaseSensitivity::CaseSensitive, + }], + specificity(0, 1, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // https://github.com/mozilla/servo/issues/1723 + assert_eq!( + parse("::before"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Combinator(Combinator::PseudoElement), + Component::PseudoElement(PseudoElement::Before), + ], + specificity(0, 0, 1), + SelectorFlags::HAS_PSEUDO, + )])) + ); + assert_eq!( + parse("::before:hover"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Combinator(Combinator::PseudoElement), + Component::PseudoElement(PseudoElement::Before), + Component::NonTSPseudoClass(PseudoClass::Hover), + ], + specificity(0, 1, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT | SelectorFlags::HAS_PSEUDO, + )])) + ); + assert_eq!( + parse("::before:hover:hover"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::Combinator(Combinator::PseudoElement), + Component::PseudoElement(PseudoElement::Before), + Component::NonTSPseudoClass(PseudoClass::Hover), + Component::NonTSPseudoClass(PseudoClass::Hover), + ], + specificity(0, 2, 1), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT | SelectorFlags::HAS_PSEUDO, + )])) + ); + assert!(parse("::before:hover:lang(foo)").is_err()); + assert!(parse("::before:hover .foo").is_err()); + assert!(parse("::before .foo").is_err()); + assert!(parse("::before ~ bar").is_err()); + assert!(parse("::before:active").is_ok()); + + // https://github.com/servo/servo/issues/15335 + assert!(parse(":: before").is_err()); + assert_eq!( + parse("div ::after"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::LocalName(LocalName { + name: DummyAtom::from("div"), + lower_name: DummyAtom::from("div"), + }), + Component::Combinator(Combinator::Descendant), + Component::Combinator(Combinator::PseudoElement), + Component::PseudoElement(PseudoElement::After), + ], + specificity(0, 0, 2), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT | SelectorFlags::HAS_PSEUDO, + )])) + ); + assert_eq!( + parse("#d1 > .ok"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ID(DummyAtom::from("d1")), + Component::Combinator(Combinator::Child), + Component::Class(DummyAtom::from("ok")), + ], + (1 << 20) + (1 << 10) + (0 << 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + parser.default_ns = None; + assert!(parse(":not(#provel.old)").is_ok()); + assert!(parse(":not(#provel > old)").is_ok()); + assert!(parse("table[rules]:not([rules=\"none\"]):not([rules=\"\"])").is_ok()); + // https://github.com/servo/servo/issues/16017 + assert_eq!( + parse_ns(":not(*)", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::Negation(SelectorList::from_vec(vec![ + Selector::from_vec( + vec![Component::ExplicitUniversalType], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + ) + ]))], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + assert_eq!( + parse_ns(":not(|*)", &parser), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::Negation(SelectorList::from_vec(vec![ + Selector::from_vec( + vec![ + Component::ExplicitNoNamespace, + Component::ExplicitUniversalType, + ], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + ) + ]))], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + // *| should be elided if there is no default namespace. + // https://github.com/servo/servo/pull/17537 + assert_eq!( + parse_ns_expected(":not(*|*)", &parser, Some(":not(*)")), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![Component::Negation(SelectorList::from_vec(vec![ + Selector::from_vec( + vec![Component::ExplicitUniversalType], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + ) + ]))], + specificity(0, 0, 0), + SelectorFlags::HAS_NON_FEATURELESS_COMPONENT, + )])) + ); + + assert!(parse("::highlight(foo)").is_ok()); + + assert!(parse("::slotted()").is_err()); + assert!(parse("::slotted(div)").is_ok()); + assert!(parse("::slotted(div).foo").is_err()); + assert!(parse("::slotted(div + bar)").is_err()); + assert!(parse("::slotted(div) + foo").is_err()); + + assert!(parse("::part()").is_err()); + assert!(parse("::part(42)").is_err()); + assert!(parse("::part(foo bar)").is_ok()); + assert!(parse("::part(foo):hover").is_ok()); + assert!(parse("::part(foo) + bar").is_err()); + + assert!(parse("div ::slotted(div)").is_ok()); + assert!(parse("div + slot::slotted(div)").is_ok()); + assert!(parse("div + slot::slotted(div.foo)").is_ok()); + assert!(parse("slot::slotted(div,foo)::first-line").is_err()); + assert!(parse("::slotted(div)::before").is_ok()); + assert!(parse("slot::slotted(div,foo)").is_err()); + + assert!(parse("foo:where()").is_ok()); + assert!(parse("foo:where(div, foo, .bar baz)").is_ok()); + assert!(parse("foo:where(::before)").is_ok()); + } + + #[test] + fn parent_selector() { + assert!(parse("foo &").is_ok()); + assert_eq!( + parse("#foo &.bar"), + Ok(SelectorList::from_vec(vec![Selector::from_vec( + vec![ + Component::ID(DummyAtom::from("foo")), + Component::Combinator(Combinator::Descendant), + Component::ParentSelector, + Component::Class(DummyAtom::from("bar")), + ], + (1 << 20) + (1 << 10) + (0 << 0), + SelectorFlags::HAS_PARENT | SelectorFlags::HAS_NON_FEATURELESS_COMPONENT + )])) + ); + + let parent = parse(".bar, div .baz").unwrap(); + let child = parse("#foo &.bar").unwrap(); + assert_eq!( + child.replace_parent_selector(&parent), + parse("#foo :is(.bar, div .baz).bar").unwrap() + ); + + let has_child = parse("#foo:has(&.bar)").unwrap(); + assert_eq!( + has_child.replace_parent_selector(&parent), + parse("#foo:has(:is(.bar, div .baz).bar)").unwrap() + ); + + let child = parse("#foo").unwrap(); + assert_eq!( + child.replace_parent_selector(&parent), + parse(":is(.bar, div .baz) #foo").unwrap() + ); + + let child = + parse_relative_expected("+ #foo", ParseRelative::ForNesting, Some("& + #foo")).unwrap(); + assert_eq!(child, parse("& + #foo").unwrap()); + } + + #[test] + fn test_pseudo_iter() { + let list = parse("q::before").unwrap(); + let selector = &list.slice()[0]; + assert!(!selector.is_universal()); + let mut iter = selector.iter(); + assert_eq!( + iter.next(), + Some(&Component::PseudoElement(PseudoElement::Before)) + ); + assert_eq!(iter.next(), None); + let combinator = iter.next_sequence(); + assert_eq!(combinator, Some(Combinator::PseudoElement)); + assert!(matches!(iter.next(), Some(&Component::LocalName(..)))); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_sequence(), None); + } + + #[test] + fn test_universal() { + let list = parse_ns( + "*|*::before", + &DummyParser::default_with_namespace(DummyAtom::from("https://mozilla.org")), + ) + .unwrap(); + let selector = &list.slice()[0]; + assert!(selector.is_universal()); + } + + #[test] + fn test_empty_pseudo_iter() { + let list = parse("::before").unwrap(); + let selector = &list.slice()[0]; + assert!(selector.is_universal()); + let mut iter = selector.iter(); + assert_eq!( + iter.next(), + Some(&Component::PseudoElement(PseudoElement::Before)) + ); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_sequence(), Some(Combinator::PseudoElement)); + assert_eq!(iter.next(), None); + assert_eq!(iter.next_sequence(), None); + } + + struct TestVisitor { + seen: Vec<String>, + } + + impl SelectorVisitor for TestVisitor { + type Impl = DummySelectorImpl; + + fn visit_simple_selector(&mut self, s: &Component<DummySelectorImpl>) -> bool { + let mut dest = String::new(); + s.to_css(&mut dest).unwrap(); + self.seen.push(dest); + true + } + } + + #[test] + fn visitor() { + let mut test_visitor = TestVisitor { seen: vec![] }; + parse(":not(:hover) ~ label").unwrap().slice()[0].visit(&mut test_visitor); + assert!(test_visitor.seen.contains(&":hover".into())); + + let mut test_visitor = TestVisitor { seen: vec![] }; + parse("::before:hover").unwrap().slice()[0].visit(&mut test_visitor); + assert!(test_visitor.seen.contains(&":hover".into())); + } +} diff --git a/servo/components/selectors/relative_selector/cache.rs b/servo/components/selectors/relative_selector/cache.rs new file mode 100644 index 0000000000..d7681aa3a4 --- /dev/null +++ b/servo/components/selectors/relative_selector/cache.rs @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use fxhash::FxHashMap; +/// Relative selector cache. This is useful for following cases. +/// First case is non-subject relative selector: Imagine `.anchor:has(<..>) ~ .foo`, with DOM +/// `.anchor + .foo + .. + .foo`. Each match on `.foo` triggers `:has()` traversal that +/// yields the same result. This is simple enough, since we just need to store +/// the exact match on that anchor pass/fail. +/// Second case is `querySelectorAll`: Imagine `querySelectorAll(':has(.a)')`, with DOM +/// `div > .. > div > .a`. When the we perform the traversal at the top div, +/// we basically end up evaluating `:has(.a)` for all anchors, which could be reused. +/// Also consider the sibling version: `querySelectorAll(':has(~ .a)')` with DOM +/// `div + .. + div + .a`. +/// TODO(dshin): Second case is not yet handled. That is tracked in Bug 1845291. +use std::hash::Hash; + +use crate::parser::{RelativeSelector, SelectorKey}; +use crate::{tree::OpaqueElement, SelectorImpl}; + +/// Match data for a given element and a selector. +#[derive(Clone, Copy)] +pub enum RelativeSelectorCachedMatch { + /// This selector matches this element. + Matched, + /// This selector does not match this element. + NotMatched, +} + +impl RelativeSelectorCachedMatch { + /// Is the cached result a match? + pub fn matched(&self) -> bool { + matches!(*self, Self::Matched) + } +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +struct Key { + element: OpaqueElement, + selector: SelectorKey, +} + +impl Key { + pub fn new<Impl: SelectorImpl>( + element: OpaqueElement, + selector: &RelativeSelector<Impl>, + ) -> Self { + Key { + element, + selector: SelectorKey::new(&selector.selector), + } + } +} + +/// Cache to speed up matching of relative selectors. +#[derive(Default)] +pub struct RelativeSelectorCache { + cache: FxHashMap<Key, RelativeSelectorCachedMatch>, +} + +impl RelativeSelectorCache { + /// Add a relative selector match into the cache. + pub fn add<Impl: SelectorImpl>( + &mut self, + anchor: OpaqueElement, + selector: &RelativeSelector<Impl>, + matched: RelativeSelectorCachedMatch, + ) { + self.cache.insert(Key::new(anchor, selector), matched); + } + + /// Check if we have a cache entry for the element. + pub fn lookup<Impl: SelectorImpl>( + &mut self, + element: OpaqueElement, + selector: &RelativeSelector<Impl>, + ) -> Option<RelativeSelectorCachedMatch> { + self.cache.get(&Key::new(element, selector)).copied() + } +} diff --git a/servo/components/selectors/relative_selector/filter.rs b/servo/components/selectors/relative_selector/filter.rs new file mode 100644 index 0000000000..3c3eb7d2fa --- /dev/null +++ b/servo/components/selectors/relative_selector/filter.rs @@ -0,0 +1,159 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +/// Bloom filter for relative selectors. +use fxhash::FxHashMap; + +use crate::bloom::BloomFilter; +use crate::context::QuirksMode; +use crate::parser::{collect_selector_hashes, RelativeSelector, RelativeSelectorMatchHint}; +use crate::tree::{Element, OpaqueElement}; +use crate::SelectorImpl; + +enum Entry { + /// Filter lookup happened once. Construction of the filter is expensive, + /// so this is set when the element for subtree traversal is encountered. + Lookup, + /// Filter lookup happened more than once, and the filter for this element's + /// subtree traversal is constructed. Could use special handlings for pseudo-classes + /// such as `:hover` and `:focus`, see Bug 1845503. + HasFilter(Box<BloomFilter>), +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +enum TraversalKind { + Children, + Descendants, +} + +fn add_to_filter<E: Element>(element: &E, filter: &mut BloomFilter, kind: TraversalKind) -> bool { + let mut child = element.first_element_child(); + while let Some(e) = child { + if !e.add_element_unique_hashes(filter) { + return false; + } + if kind == TraversalKind::Descendants { + if !add_to_filter(&e, filter, kind) { + return false; + } + } + child = e.next_sibling_element(); + } + true +} + +#[derive(Clone, Copy, Hash, Eq, PartialEq)] +struct Key(OpaqueElement, TraversalKind); + +/// Map of bloom filters for fast-rejecting relative selectors. +#[derive(Default)] +pub struct RelativeSelectorFilterMap { + map: FxHashMap<Key, Entry>, +} + +fn fast_reject<Impl: SelectorImpl>( + selector: &RelativeSelector<Impl>, + quirks_mode: QuirksMode, + filter: &BloomFilter, +) -> bool { + let mut hashes = [0u32; 4]; + let mut len = 0; + // For inner selectors, we only collect from the single rightmost compound. + // This is because inner selectors can cause breakouts: e.g. `.anchor:has(:is(.a .b) .c)` + // can match when `.a` is the ancestor of `.anchor`. Including `.a` would possibly fast + // reject the subtree for not having `.a`, even if the selector would match. + // Technically, if the selector's traversal is non-sibling subtree, we can traverse + // inner selectors up to the point where a descendant/child combinator is encountered + // (e.g. In `.anchor:has(:is(.a ~ .b) .c)`, `.a` is guaranteed to be the a descendant + // of `.anchor`). While that can be separately handled, well, this is simpler. + collect_selector_hashes( + selector.selector.iter(), + quirks_mode, + &mut hashes, + &mut len, + |s| s.iter(), + ); + for i in 0..len { + if !filter.might_contain_hash(hashes[i]) { + // Definitely rejected. + return true; + } + } + false +} + +impl RelativeSelectorFilterMap { + fn get_filter<E: Element>(&mut self, element: &E, kind: TraversalKind) -> Option<&BloomFilter> { + // Insert flag to indicate that we looked up the filter once, and + // create the filter if and only if that flag is there. + let key = Key(element.opaque(), kind); + let entry = self + .map + .entry(key) + .and_modify(|entry| { + if !matches!(entry, Entry::Lookup) { + return; + } + let mut filter = BloomFilter::new(); + // Go through all children/descendants of this element and add their hashes. + if add_to_filter(element, &mut filter, kind) { + *entry = Entry::HasFilter(Box::new(filter)); + } + }) + .or_insert(Entry::Lookup); + match entry { + Entry::Lookup => None, + Entry::HasFilter(ref filter) => Some(filter.as_ref()), + } + } + + /// Potentially reject the given selector for this element. + /// This may seem redundant in presence of the cache, but the cache keys into the + /// selector-element pair specifically, while this filter keys to the element + /// and the traversal kind, so it is useful for handling multiple selectors + /// that effectively end up looking at the same(-ish, for siblings) subtree. + pub fn fast_reject<Impl: SelectorImpl, E: Element>( + &mut self, + element: &E, + selector: &RelativeSelector<Impl>, + quirks_mode: QuirksMode, + ) -> bool { + if matches!( + selector.match_hint, + RelativeSelectorMatchHint::InNextSibling + ) { + // Don't bother. + return false; + } + let is_sibling = matches!( + selector.match_hint, + RelativeSelectorMatchHint::InSibling | + RelativeSelectorMatchHint::InNextSiblingSubtree | + RelativeSelectorMatchHint::InSiblingSubtree + ); + let is_subtree = matches!( + selector.match_hint, + RelativeSelectorMatchHint::InSubtree | + RelativeSelectorMatchHint::InNextSiblingSubtree | + RelativeSelectorMatchHint::InSiblingSubtree + ); + let kind = if is_subtree { + TraversalKind::Descendants + } else { + TraversalKind::Children + }; + if is_sibling { + // Contain the entirety of the parent's children/subtree in the filter, and use that. + // This is less likely to reject, especially for sibling subtree matches; however, it's less + // expensive memory-wise, compared to storing filters for each sibling. + element.parent_element().map_or(false, |parent| { + self.get_filter(&parent, kind) + .map_or(false, |filter| fast_reject(selector, quirks_mode, filter)) + }) + } else { + self.get_filter(element, kind) + .map_or(false, |filter| fast_reject(selector, quirks_mode, filter)) + } + } +} diff --git a/servo/components/selectors/relative_selector/mod.rs b/servo/components/selectors/relative_selector/mod.rs new file mode 100644 index 0000000000..6dd39f7327 --- /dev/null +++ b/servo/components/selectors/relative_selector/mod.rs @@ -0,0 +1,6 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +pub mod cache; +pub mod filter; diff --git a/servo/components/selectors/sink.rs b/servo/components/selectors/sink.rs new file mode 100644 index 0000000000..dcdd7ff259 --- /dev/null +++ b/servo/components/selectors/sink.rs @@ -0,0 +1,31 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Small helpers to abstract over different containers. +#![deny(missing_docs)] + +use smallvec::{Array, SmallVec}; + +/// A trait to abstract over a `push` method that may be implemented for +/// different kind of types. +/// +/// Used to abstract over `Array`, `SmallVec` and `Vec`, and also to implement a +/// type which `push` method does only tweak a byte when we only need to check +/// for the presence of something. +pub trait Push<T> { + /// Push a value into self. + fn push(&mut self, value: T); +} + +impl<T> Push<T> for Vec<T> { + fn push(&mut self, value: T) { + Vec::push(self, value); + } +} + +impl<A: Array> Push<A::Item> for SmallVec<A> { + fn push(&mut self, value: A::Item) { + SmallVec::push(self, value); + } +} diff --git a/servo/components/selectors/tree.rs b/servo/components/selectors/tree.rs new file mode 100644 index 0000000000..c1ea8ff5ae --- /dev/null +++ b/servo/components/selectors/tree.rs @@ -0,0 +1,168 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Traits that nodes must implement. Breaks the otherwise-cyclic dependency +//! between layout and style. + +use crate::attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint}; +use crate::bloom::BloomFilter; +use crate::matching::{ElementSelectorFlags, MatchingContext}; +use crate::parser::SelectorImpl; +use std::fmt::Debug; +use std::ptr::NonNull; + +/// Opaque representation of an Element, for identity comparisons. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +pub struct OpaqueElement(NonNull<()>); + +unsafe impl Send for OpaqueElement {} + +impl OpaqueElement { + /// Creates a new OpaqueElement from an arbitrarily-typed pointer. + pub fn new<T>(ptr: &T) -> Self { + unsafe { + OpaqueElement(NonNull::new_unchecked( + ptr as *const T as *const () as *mut (), + )) + } + } +} + +pub trait Element: Sized + Clone + Debug { + type Impl: SelectorImpl; + + /// Converts self into an opaque representation. + fn opaque(&self) -> OpaqueElement; + + fn parent_element(&self) -> Option<Self>; + + /// Whether the parent node of this element is a shadow root. + fn parent_node_is_shadow_root(&self) -> bool; + + /// The host of the containing shadow root, if any. + fn containing_shadow_host(&self) -> Option<Self>; + + /// The parent of a given pseudo-element, after matching a pseudo-element + /// selector. + /// + /// This is guaranteed to be called in a pseudo-element. + fn pseudo_element_originating_element(&self) -> Option<Self> { + debug_assert!(self.is_pseudo_element()); + self.parent_element() + } + + /// Whether we're matching on a pseudo-element. + fn is_pseudo_element(&self) -> bool; + + /// Skips non-element nodes + fn prev_sibling_element(&self) -> Option<Self>; + + /// Skips non-element nodes + fn next_sibling_element(&self) -> Option<Self>; + + /// Skips non-element nodes + fn first_element_child(&self) -> Option<Self>; + + fn is_html_element_in_html_document(&self) -> bool; + + fn has_local_name(&self, local_name: &<Self::Impl as SelectorImpl>::BorrowedLocalName) -> bool; + + /// Empty string for no namespace + fn has_namespace(&self, ns: &<Self::Impl as SelectorImpl>::BorrowedNamespaceUrl) -> bool; + + /// Whether this element and the `other` element have the same local name and namespace. + fn is_same_type(&self, other: &Self) -> bool; + + fn attr_matches( + &self, + ns: &NamespaceConstraint<&<Self::Impl as SelectorImpl>::NamespaceUrl>, + local_name: &<Self::Impl as SelectorImpl>::LocalName, + operation: &AttrSelectorOperation<&<Self::Impl as SelectorImpl>::AttrValue>, + ) -> bool; + + fn has_attr_in_no_namespace( + &self, + local_name: &<Self::Impl as SelectorImpl>::LocalName, + ) -> bool { + self.attr_matches( + &NamespaceConstraint::Specific(&crate::parser::namespace_empty_string::<Self::Impl>()), + local_name, + &AttrSelectorOperation::Exists, + ) + } + + fn match_non_ts_pseudo_class( + &self, + pc: &<Self::Impl as SelectorImpl>::NonTSPseudoClass, + context: &mut MatchingContext<Self::Impl>, + ) -> bool; + + fn match_pseudo_element( + &self, + pe: &<Self::Impl as SelectorImpl>::PseudoElement, + context: &mut MatchingContext<Self::Impl>, + ) -> bool; + + /// Sets selector flags on the elemnt itself or the parent, depending on the + /// flags, which indicate what kind of work may need to be performed when + /// DOM state changes. + fn apply_selector_flags(&self, flags: ElementSelectorFlags); + + /// Whether this element is a `link`. + fn is_link(&self) -> bool; + + /// Returns whether the element is an HTML <slot> element. + fn is_html_slot_element(&self) -> bool; + + /// Returns the assigned <slot> element this element is assigned to. + /// + /// Necessary for the `::slotted` pseudo-class. + fn assigned_slot(&self) -> Option<Self> { + None + } + + fn has_id( + &self, + id: &<Self::Impl as SelectorImpl>::Identifier, + case_sensitivity: CaseSensitivity, + ) -> bool; + + fn has_class( + &self, + name: &<Self::Impl as SelectorImpl>::Identifier, + case_sensitivity: CaseSensitivity, + ) -> bool; + + /// Returns the mapping from the `exportparts` attribute in the reverse + /// direction, that is, in an outer-tree -> inner-tree direction. + fn imported_part( + &self, + name: &<Self::Impl as SelectorImpl>::Identifier, + ) -> Option<<Self::Impl as SelectorImpl>::Identifier>; + + fn is_part(&self, name: &<Self::Impl as SelectorImpl>::Identifier) -> bool; + + /// Returns whether this element matches `:empty`. + /// + /// That is, whether it does not contain any child element or any non-zero-length text node. + /// See http://dev.w3.org/csswg/selectors-3/#empty-pseudo + fn is_empty(&self) -> bool; + + /// Returns whether this element matches `:root`, + /// i.e. whether it is the root element of a document. + /// + /// Note: this can be false even if `.parent_element()` is `None` + /// if the parent node is a `DocumentFragment`. + fn is_root(&self) -> bool; + + /// Returns whether this element should ignore matching nth child + /// selector. + fn ignores_nth_child_selectors(&self) -> bool { + false + } + + /// Add hashes unique to this element to the given filter, returning true + /// if any got added. + fn add_element_unique_hashes(&self, filter: &mut BloomFilter) -> bool; +} diff --git a/servo/components/selectors/visitor.rs b/servo/components/selectors/visitor.rs new file mode 100644 index 0000000000..d5befbc68b --- /dev/null +++ b/servo/components/selectors/visitor.rs @@ -0,0 +1,136 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +//! Visitor traits for selectors. + +#![deny(missing_docs)] + +use crate::attr::NamespaceConstraint; +use crate::parser::{Combinator, Component, RelativeSelector, Selector, SelectorImpl}; + +/// A trait to visit selector properties. +/// +/// All the `visit_foo` methods return a boolean indicating whether the +/// traversal should continue or not. +pub trait SelectorVisitor: Sized { + /// The selector implementation this visitor wants to visit. + type Impl: SelectorImpl; + + /// Visit an attribute selector that may match (there are other selectors + /// that may never match, like those containing whitespace or the empty + /// string). + fn visit_attribute_selector( + &mut self, + _namespace: &NamespaceConstraint<&<Self::Impl as SelectorImpl>::NamespaceUrl>, + _local_name: &<Self::Impl as SelectorImpl>::LocalName, + _local_name_lower: &<Self::Impl as SelectorImpl>::LocalName, + ) -> bool { + true + } + + /// Visit a simple selector. + fn visit_simple_selector(&mut self, _: &Component<Self::Impl>) -> bool { + true + } + + /// Visit a nested relative selector list. The caller is responsible to call visit + /// into the internal selectors if / as needed. + /// + /// The default implementation skips it altogether. + fn visit_relative_selector_list(&mut self, _list: &[RelativeSelector<Self::Impl>]) -> bool { + true + } + + /// Visit a nested selector list. The caller is responsible to call visit + /// into the internal selectors if / as needed. + /// + /// The default implementation does this. + fn visit_selector_list( + &mut self, + _list_kind: SelectorListKind, + list: &[Selector<Self::Impl>], + ) -> bool { + for nested in list { + if !nested.visit(self) { + return false; + } + } + true + } + + /// Visits a complex selector. + /// + /// Gets the combinator to the right of the selector, or `None` if the + /// selector is the rightmost one. + fn visit_complex_selector(&mut self, _combinator_to_right: Option<Combinator>) -> bool { + true + } +} + +bitflags! { + /// The kinds of components the visitor is visiting the selector list of, if any + #[derive(Clone, Copy, Default)] + pub struct SelectorListKind: u8 { + /// The visitor is inside :not(..) + const NEGATION = 1 << 0; + /// The visitor is inside :is(..) + const IS = 1 << 1; + /// The visitor is inside :where(..) + const WHERE = 1 << 2; + /// The visitor is inside :nth-child(.. of <selector list>) or + /// :nth-last-child(.. of <selector list>) + const NTH_OF = 1 << 3; + /// The visitor is inside :has(..) + const HAS = 1 << 4; + } +} + +impl SelectorListKind { + /// Construct a SelectorListKind for the corresponding component. + pub fn from_component<Impl: SelectorImpl>(component: &Component<Impl>) -> Self { + match component { + Component::Negation(_) => SelectorListKind::NEGATION, + Component::Is(_) => SelectorListKind::IS, + Component::Where(_) => SelectorListKind::WHERE, + Component::NthOf(_) => SelectorListKind::NTH_OF, + _ => SelectorListKind::empty(), + } + } + + /// Whether the visitor is inside :not(..) + pub fn in_negation(&self) -> bool { + self.intersects(SelectorListKind::NEGATION) + } + + /// Whether the visitor is inside :is(..) + pub fn in_is(&self) -> bool { + self.intersects(SelectorListKind::IS) + } + + /// Whether the visitor is inside :where(..) + pub fn in_where(&self) -> bool { + self.intersects(SelectorListKind::WHERE) + } + + /// Whether the visitor is inside :nth-child(.. of <selector list>) or + /// :nth-last-child(.. of <selector list>) + pub fn in_nth_of(&self) -> bool { + self.intersects(SelectorListKind::NTH_OF) + } + + /// Whether the visitor is inside :has(..) + pub fn in_has(&self) -> bool { + self.intersects(SelectorListKind::HAS) + } + + /// Whether this nested selector is relevant for nth-of dependencies. + pub fn relevant_to_nth_of_dependencies(&self) -> bool { + // Order of nesting for `:has` and `:nth-child(.. of ..)` doesn't matter, because: + // * `:has(:nth-child(.. of ..))`: The location of the anchoring element is + // independent from where `:nth-child(.. of ..)` is applied. + // * `:nth-child(.. of :has(..))`: Invalidations inside `:has` must first use the + // `:has` machinary to find the anchor, then carry out the remaining invalidation. + self.in_nth_of() && !self.in_has() + } +} |