diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cssparser/src | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/cssparser/src')
-rw-r--r-- | third_party/rust/cssparser/src/color.rs | 687 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/cow_rc_str.rs | 199 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/from_bytes.rs | 64 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/lib.rs | 115 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/macros.rs | 196 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/nth.rs | 146 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/parser.rs | 1073 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/rules_and_declarations.rs | 528 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/serializer.rs | 552 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/size_of_tests.rs | 59 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/tests.rs | 1429 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/tokenizer.rs | 1397 | ||||
-rw-r--r-- | third_party/rust/cssparser/src/unicode_range.rs | 181 |
13 files changed, 6626 insertions, 0 deletions
diff --git a/third_party/rust/cssparser/src/color.rs b/third_party/rust/cssparser/src/color.rs new file mode 100644 index 0000000000..8d0d521f3b --- /dev/null +++ b/third_party/rust/cssparser/src/color.rs @@ -0,0 +1,687 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::f32::consts::PI; +use std::fmt; + +use super::{BasicParseError, ParseError, Parser, ToCss, Token}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +/// A color with red, green, blue, and alpha components, in a byte each. +#[derive(Clone, Copy, PartialEq, Debug)] +#[repr(C)] +pub struct RGBA { + /// The red component. + pub red: u8, + /// The green component. + pub green: u8, + /// The blue component. + pub blue: u8, + /// The alpha component. + pub alpha: u8, +} + +impl RGBA { + /// Constructs a new RGBA value from float components. It expects the red, + /// green, blue and alpha channels in that order, and all values will be + /// clamped to the 0.0 ... 1.0 range. + #[inline] + pub fn from_floats(red: f32, green: f32, blue: f32, alpha: f32) -> Self { + Self::new( + clamp_unit_f32(red), + clamp_unit_f32(green), + clamp_unit_f32(blue), + clamp_unit_f32(alpha), + ) + } + + /// Returns a transparent color. + #[inline] + pub fn transparent() -> Self { + Self::new(0, 0, 0, 0) + } + + /// Same thing, but with `u8` values instead of floats in the 0 to 1 range. + #[inline] + pub fn new(red: u8, green: u8, blue: u8, alpha: u8) -> Self { + RGBA { + red: red, + green: green, + blue: blue, + alpha: alpha, + } + } + + /// Returns the red channel in a floating point number form, from 0 to 1. + #[inline] + pub fn red_f32(&self) -> f32 { + self.red as f32 / 255.0 + } + + /// Returns the green channel in a floating point number form, from 0 to 1. + #[inline] + pub fn green_f32(&self) -> f32 { + self.green as f32 / 255.0 + } + + /// Returns the blue channel in a floating point number form, from 0 to 1. + #[inline] + pub fn blue_f32(&self) -> f32 { + self.blue as f32 / 255.0 + } + + /// Returns the alpha channel in a floating point number form, from 0 to 1. + #[inline] + pub fn alpha_f32(&self) -> f32 { + self.alpha as f32 / 255.0 + } +} + +#[cfg(feature = "serde")] +impl Serialize for RGBA { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + (self.red, self.green, self.blue, self.alpha).serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for RGBA { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let (r, g, b, a) = Deserialize::deserialize(deserializer)?; + Ok(RGBA::new(r, g, b, a)) + } +} + +impl ToCss for RGBA { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + let serialize_alpha = self.alpha != 255; + + dest.write_str(if serialize_alpha { "rgba(" } else { "rgb(" })?; + self.red.to_css(dest)?; + dest.write_str(", ")?; + self.green.to_css(dest)?; + dest.write_str(", ")?; + self.blue.to_css(dest)?; + if serialize_alpha { + dest.write_str(", ")?; + + // Try first with two decimal places, then with three. + let mut rounded_alpha = (self.alpha_f32() * 100.).round() / 100.; + if clamp_unit_f32(rounded_alpha) != self.alpha { + rounded_alpha = (self.alpha_f32() * 1000.).round() / 1000.; + } + + rounded_alpha.to_css(dest)?; + } + dest.write_char(')') + } +} + +/// A <color> value. +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum Color { + /// The 'currentcolor' keyword + CurrentColor, + /// Everything else gets converted to RGBA during parsing + RGBA(RGBA), +} + +impl ToCss for Color { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + match *self { + Color::CurrentColor => dest.write_str("currentcolor"), + Color::RGBA(ref rgba) => rgba.to_css(dest), + } + } +} + +/// Either a number or a percentage. +pub enum NumberOrPercentage { + /// `<number>`. + Number { + /// The numeric value parsed, as a float. + value: f32, + }, + /// `<percentage>` + Percentage { + /// The value as a float, divided by 100 so that the nominal range is + /// 0.0 to 1.0. + unit_value: f32, + }, +} + +impl NumberOrPercentage { + fn unit_value(&self) -> f32 { + match *self { + NumberOrPercentage::Number { value } => value, + NumberOrPercentage::Percentage { unit_value } => unit_value, + } + } +} + +/// Either an angle or a number. +pub enum AngleOrNumber { + /// `<number>`. + Number { + /// The numeric value parsed, as a float. + value: f32, + }, + /// `<angle>` + Angle { + /// The value as a number of degrees. + degrees: f32, + }, +} + +impl AngleOrNumber { + fn degrees(&self) -> f32 { + match *self { + AngleOrNumber::Number { value } => value, + AngleOrNumber::Angle { degrees } => degrees, + } + } +} + +/// A trait that can be used to hook into how `cssparser` parses color +/// components, with the intention of implementing more complicated behavior. +/// +/// For example, this is used by Servo to support calc() in color. +pub trait ColorComponentParser<'i> { + /// A custom error type that can be returned from the parsing functions. + type Error: 'i; + + /// Parse an `<angle>` or `<number>`. + /// + /// Returns the result in degrees. + fn parse_angle_or_number<'t>( + &self, + input: &mut Parser<'i, 't>, + ) -> Result<AngleOrNumber, ParseError<'i, Self::Error>> { + let location = input.current_source_location(); + Ok(match *input.next()? { + Token::Number { value, .. } => AngleOrNumber::Number { value }, + Token::Dimension { + value: v, ref unit, .. + } => { + let degrees = match_ignore_ascii_case! { &*unit, + "deg" => v, + "grad" => v * 360. / 400., + "rad" => v * 360. / (2. * PI), + "turn" => v * 360., + _ => return Err(location.new_unexpected_token_error(Token::Ident(unit.clone()))), + }; + + AngleOrNumber::Angle { degrees } + } + ref t => return Err(location.new_unexpected_token_error(t.clone())), + }) + } + + /// Parse a `<percentage>` value. + /// + /// Returns the result in a number from 0.0 to 1.0. + fn parse_percentage<'t>( + &self, + input: &mut Parser<'i, 't>, + ) -> Result<f32, ParseError<'i, Self::Error>> { + input.expect_percentage().map_err(From::from) + } + + /// Parse a `<number>` value. + fn parse_number<'t>( + &self, + input: &mut Parser<'i, 't>, + ) -> Result<f32, ParseError<'i, Self::Error>> { + input.expect_number().map_err(From::from) + } + + /// Parse a `<number>` value or a `<percentage>` value. + fn parse_number_or_percentage<'t>( + &self, + input: &mut Parser<'i, 't>, + ) -> Result<NumberOrPercentage, ParseError<'i, Self::Error>> { + let location = input.current_source_location(); + Ok(match *input.next()? { + Token::Number { value, .. } => NumberOrPercentage::Number { value }, + Token::Percentage { unit_value, .. } => NumberOrPercentage::Percentage { unit_value }, + ref t => return Err(location.new_unexpected_token_error(t.clone())), + }) + } +} + +struct DefaultComponentParser; +impl<'i> ColorComponentParser<'i> for DefaultComponentParser { + type Error = (); +} + +impl Color { + /// Parse a <color> value, per CSS Color Module Level 3. + /// + /// FIXME(#2) Deprecated CSS2 System Colors are not supported yet. + pub fn parse_with<'i, 't, ComponentParser>( + component_parser: &ComponentParser, + input: &mut Parser<'i, 't>, + ) -> Result<Color, ParseError<'i, ComponentParser::Error>> + where + ComponentParser: ColorComponentParser<'i>, + { + let location = input.current_source_location(); + let token = input.next()?; + match *token { + Token::Hash(ref value) | Token::IDHash(ref value) => { + Color::parse_hash(value.as_bytes()) + } + Token::Ident(ref value) => parse_color_keyword(&*value), + Token::Function(ref name) => { + let name = name.clone(); + return input.parse_nested_block(|arguments| { + parse_color_function(component_parser, &*name, arguments) + }); + } + _ => Err(()), + } + .map_err(|()| location.new_unexpected_token_error(token.clone())) + } + + /// Parse a <color> value, per CSS Color Module Level 3. + pub fn parse<'i, 't>(input: &mut Parser<'i, 't>) -> Result<Color, BasicParseError<'i>> { + let component_parser = DefaultComponentParser; + Self::parse_with(&component_parser, input).map_err(ParseError::basic) + } + + /// Parse a color hash, without the leading '#' character. + #[inline] + pub fn parse_hash(value: &[u8]) -> Result<Self, ()> { + match value.len() { + 8 => Ok(rgba( + from_hex(value[0])? * 16 + from_hex(value[1])?, + from_hex(value[2])? * 16 + from_hex(value[3])?, + from_hex(value[4])? * 16 + from_hex(value[5])?, + from_hex(value[6])? * 16 + from_hex(value[7])?, + )), + 6 => Ok(rgb( + from_hex(value[0])? * 16 + from_hex(value[1])?, + from_hex(value[2])? * 16 + from_hex(value[3])?, + from_hex(value[4])? * 16 + from_hex(value[5])?, + )), + 4 => Ok(rgba( + from_hex(value[0])? * 17, + from_hex(value[1])? * 17, + from_hex(value[2])? * 17, + from_hex(value[3])? * 17, + )), + 3 => Ok(rgb( + from_hex(value[0])? * 17, + from_hex(value[1])? * 17, + from_hex(value[2])? * 17, + )), + _ => Err(()), + } + } +} + +#[inline] +fn rgb(red: u8, green: u8, blue: u8) -> Color { + rgba(red, green, blue, 255) +} + +#[inline] +fn rgba(red: u8, green: u8, blue: u8, alpha: u8) -> Color { + Color::RGBA(RGBA::new(red, green, blue, alpha)) +} + +/// Return the named color with the given name. +/// +/// Matching is case-insensitive in the ASCII range. +/// CSS escaping (if relevant) should be resolved before calling this function. +/// (For example, the value of an `Ident` token is fine.) +#[inline] +pub fn parse_color_keyword(ident: &str) -> Result<Color, ()> { + macro_rules! rgb { + ($red: expr, $green: expr, $blue: expr) => { + Color::RGBA(RGBA { + red: $red, + green: $green, + blue: $blue, + alpha: 255, + }) + }; + } + ascii_case_insensitive_phf_map! { + keyword -> Color = { + "black" => rgb!(0, 0, 0), + "silver" => rgb!(192, 192, 192), + "gray" => rgb!(128, 128, 128), + "white" => rgb!(255, 255, 255), + "maroon" => rgb!(128, 0, 0), + "red" => rgb!(255, 0, 0), + "purple" => rgb!(128, 0, 128), + "fuchsia" => rgb!(255, 0, 255), + "green" => rgb!(0, 128, 0), + "lime" => rgb!(0, 255, 0), + "olive" => rgb!(128, 128, 0), + "yellow" => rgb!(255, 255, 0), + "navy" => rgb!(0, 0, 128), + "blue" => rgb!(0, 0, 255), + "teal" => rgb!(0, 128, 128), + "aqua" => rgb!(0, 255, 255), + + "aliceblue" => rgb!(240, 248, 255), + "antiquewhite" => rgb!(250, 235, 215), + "aquamarine" => rgb!(127, 255, 212), + "azure" => rgb!(240, 255, 255), + "beige" => rgb!(245, 245, 220), + "bisque" => rgb!(255, 228, 196), + "blanchedalmond" => rgb!(255, 235, 205), + "blueviolet" => rgb!(138, 43, 226), + "brown" => rgb!(165, 42, 42), + "burlywood" => rgb!(222, 184, 135), + "cadetblue" => rgb!(95, 158, 160), + "chartreuse" => rgb!(127, 255, 0), + "chocolate" => rgb!(210, 105, 30), + "coral" => rgb!(255, 127, 80), + "cornflowerblue" => rgb!(100, 149, 237), + "cornsilk" => rgb!(255, 248, 220), + "crimson" => rgb!(220, 20, 60), + "cyan" => rgb!(0, 255, 255), + "darkblue" => rgb!(0, 0, 139), + "darkcyan" => rgb!(0, 139, 139), + "darkgoldenrod" => rgb!(184, 134, 11), + "darkgray" => rgb!(169, 169, 169), + "darkgreen" => rgb!(0, 100, 0), + "darkgrey" => rgb!(169, 169, 169), + "darkkhaki" => rgb!(189, 183, 107), + "darkmagenta" => rgb!(139, 0, 139), + "darkolivegreen" => rgb!(85, 107, 47), + "darkorange" => rgb!(255, 140, 0), + "darkorchid" => rgb!(153, 50, 204), + "darkred" => rgb!(139, 0, 0), + "darksalmon" => rgb!(233, 150, 122), + "darkseagreen" => rgb!(143, 188, 143), + "darkslateblue" => rgb!(72, 61, 139), + "darkslategray" => rgb!(47, 79, 79), + "darkslategrey" => rgb!(47, 79, 79), + "darkturquoise" => rgb!(0, 206, 209), + "darkviolet" => rgb!(148, 0, 211), + "deeppink" => rgb!(255, 20, 147), + "deepskyblue" => rgb!(0, 191, 255), + "dimgray" => rgb!(105, 105, 105), + "dimgrey" => rgb!(105, 105, 105), + "dodgerblue" => rgb!(30, 144, 255), + "firebrick" => rgb!(178, 34, 34), + "floralwhite" => rgb!(255, 250, 240), + "forestgreen" => rgb!(34, 139, 34), + "gainsboro" => rgb!(220, 220, 220), + "ghostwhite" => rgb!(248, 248, 255), + "gold" => rgb!(255, 215, 0), + "goldenrod" => rgb!(218, 165, 32), + "greenyellow" => rgb!(173, 255, 47), + "grey" => rgb!(128, 128, 128), + "honeydew" => rgb!(240, 255, 240), + "hotpink" => rgb!(255, 105, 180), + "indianred" => rgb!(205, 92, 92), + "indigo" => rgb!(75, 0, 130), + "ivory" => rgb!(255, 255, 240), + "khaki" => rgb!(240, 230, 140), + "lavender" => rgb!(230, 230, 250), + "lavenderblush" => rgb!(255, 240, 245), + "lawngreen" => rgb!(124, 252, 0), + "lemonchiffon" => rgb!(255, 250, 205), + "lightblue" => rgb!(173, 216, 230), + "lightcoral" => rgb!(240, 128, 128), + "lightcyan" => rgb!(224, 255, 255), + "lightgoldenrodyellow" => rgb!(250, 250, 210), + "lightgray" => rgb!(211, 211, 211), + "lightgreen" => rgb!(144, 238, 144), + "lightgrey" => rgb!(211, 211, 211), + "lightpink" => rgb!(255, 182, 193), + "lightsalmon" => rgb!(255, 160, 122), + "lightseagreen" => rgb!(32, 178, 170), + "lightskyblue" => rgb!(135, 206, 250), + "lightslategray" => rgb!(119, 136, 153), + "lightslategrey" => rgb!(119, 136, 153), + "lightsteelblue" => rgb!(176, 196, 222), + "lightyellow" => rgb!(255, 255, 224), + "limegreen" => rgb!(50, 205, 50), + "linen" => rgb!(250, 240, 230), + "magenta" => rgb!(255, 0, 255), + "mediumaquamarine" => rgb!(102, 205, 170), + "mediumblue" => rgb!(0, 0, 205), + "mediumorchid" => rgb!(186, 85, 211), + "mediumpurple" => rgb!(147, 112, 219), + "mediumseagreen" => rgb!(60, 179, 113), + "mediumslateblue" => rgb!(123, 104, 238), + "mediumspringgreen" => rgb!(0, 250, 154), + "mediumturquoise" => rgb!(72, 209, 204), + "mediumvioletred" => rgb!(199, 21, 133), + "midnightblue" => rgb!(25, 25, 112), + "mintcream" => rgb!(245, 255, 250), + "mistyrose" => rgb!(255, 228, 225), + "moccasin" => rgb!(255, 228, 181), + "navajowhite" => rgb!(255, 222, 173), + "oldlace" => rgb!(253, 245, 230), + "olivedrab" => rgb!(107, 142, 35), + "orange" => rgb!(255, 165, 0), + "orangered" => rgb!(255, 69, 0), + "orchid" => rgb!(218, 112, 214), + "palegoldenrod" => rgb!(238, 232, 170), + "palegreen" => rgb!(152, 251, 152), + "paleturquoise" => rgb!(175, 238, 238), + "palevioletred" => rgb!(219, 112, 147), + "papayawhip" => rgb!(255, 239, 213), + "peachpuff" => rgb!(255, 218, 185), + "peru" => rgb!(205, 133, 63), + "pink" => rgb!(255, 192, 203), + "plum" => rgb!(221, 160, 221), + "powderblue" => rgb!(176, 224, 230), + "rebeccapurple" => rgb!(102, 51, 153), + "rosybrown" => rgb!(188, 143, 143), + "royalblue" => rgb!(65, 105, 225), + "saddlebrown" => rgb!(139, 69, 19), + "salmon" => rgb!(250, 128, 114), + "sandybrown" => rgb!(244, 164, 96), + "seagreen" => rgb!(46, 139, 87), + "seashell" => rgb!(255, 245, 238), + "sienna" => rgb!(160, 82, 45), + "skyblue" => rgb!(135, 206, 235), + "slateblue" => rgb!(106, 90, 205), + "slategray" => rgb!(112, 128, 144), + "slategrey" => rgb!(112, 128, 144), + "snow" => rgb!(255, 250, 250), + "springgreen" => rgb!(0, 255, 127), + "steelblue" => rgb!(70, 130, 180), + "tan" => rgb!(210, 180, 140), + "thistle" => rgb!(216, 191, 216), + "tomato" => rgb!(255, 99, 71), + "turquoise" => rgb!(64, 224, 208), + "violet" => rgb!(238, 130, 238), + "wheat" => rgb!(245, 222, 179), + "whitesmoke" => rgb!(245, 245, 245), + "yellowgreen" => rgb!(154, 205, 50), + + "transparent" => Color::RGBA(RGBA { red: 0, green: 0, blue: 0, alpha: 0 }), + "currentcolor" => Color::CurrentColor, + } + } + keyword(ident).cloned().ok_or(()) +} + +#[inline] +fn from_hex(c: u8) -> Result<u8, ()> { + match c { + b'0'..=b'9' => Ok(c - b'0'), + b'a'..=b'f' => Ok(c - b'a' + 10), + b'A'..=b'F' => Ok(c - b'A' + 10), + _ => Err(()), + } +} + +fn clamp_unit_f32(val: f32) -> u8 { + // Whilst scaling by 256 and flooring would provide + // an equal distribution of integers to percentage inputs, + // this is not what Gecko does so we instead multiply by 255 + // and round (adding 0.5 and flooring is equivalent to rounding) + // + // Chrome does something similar for the alpha value, but not + // the rgb values. + // + // See https://bugzilla.mozilla.org/show_bug.cgi?id=1340484 + // + // Clamping to 256 and rounding after would let 1.0 map to 256, and + // `256.0_f32 as u8` is undefined behavior: + // + // https://github.com/rust-lang/rust/issues/10184 + clamp_floor_256_f32(val * 255.) +} + +fn clamp_floor_256_f32(val: f32) -> u8 { + val.round().max(0.).min(255.) as u8 +} + +#[inline] +fn parse_color_function<'i, 't, ComponentParser>( + component_parser: &ComponentParser, + name: &str, + arguments: &mut Parser<'i, 't>, +) -> Result<Color, ParseError<'i, ComponentParser::Error>> +where + ComponentParser: ColorComponentParser<'i>, +{ + let (red, green, blue, uses_commas) = match_ignore_ascii_case! { name, + "rgb" | "rgba" => parse_rgb_components_rgb(component_parser, arguments)?, + "hsl" | "hsla" => parse_rgb_components_hsl(component_parser, arguments)?, + _ => return Err(arguments.new_unexpected_token_error(Token::Ident(name.to_owned().into()))), + }; + + let alpha = if !arguments.is_exhausted() { + if uses_commas { + arguments.expect_comma()?; + } else { + arguments.expect_delim('/')?; + }; + clamp_unit_f32( + component_parser + .parse_number_or_percentage(arguments)? + .unit_value(), + ) + } else { + 255 + }; + + arguments.expect_exhausted()?; + Ok(rgba(red, green, blue, alpha)) +} + +#[inline] +fn parse_rgb_components_rgb<'i, 't, ComponentParser>( + component_parser: &ComponentParser, + arguments: &mut Parser<'i, 't>, +) -> Result<(u8, u8, u8, bool), ParseError<'i, ComponentParser::Error>> +where + ComponentParser: ColorComponentParser<'i>, +{ + // Either integers or percentages, but all the same type. + // https://drafts.csswg.org/css-color/#rgb-functions + let (red, is_number) = match component_parser.parse_number_or_percentage(arguments)? { + NumberOrPercentage::Number { value } => (clamp_floor_256_f32(value), true), + NumberOrPercentage::Percentage { unit_value } => (clamp_unit_f32(unit_value), false), + }; + + let uses_commas = arguments.try_parse(|i| i.expect_comma()).is_ok(); + + let green; + let blue; + if is_number { + green = clamp_floor_256_f32(component_parser.parse_number(arguments)?); + if uses_commas { + arguments.expect_comma()?; + } + blue = clamp_floor_256_f32(component_parser.parse_number(arguments)?); + } else { + green = clamp_unit_f32(component_parser.parse_percentage(arguments)?); + if uses_commas { + arguments.expect_comma()?; + } + blue = clamp_unit_f32(component_parser.parse_percentage(arguments)?); + } + + Ok((red, green, blue, uses_commas)) +} + +#[inline] +fn parse_rgb_components_hsl<'i, 't, ComponentParser>( + component_parser: &ComponentParser, + arguments: &mut Parser<'i, 't>, +) -> Result<(u8, u8, u8, bool), ParseError<'i, ComponentParser::Error>> +where + ComponentParser: ColorComponentParser<'i>, +{ + // Hue given as an angle + // https://drafts.csswg.org/css-values/#angles + let hue_degrees = component_parser.parse_angle_or_number(arguments)?.degrees(); + + // Subtract an integer before rounding, to avoid some rounding errors: + let hue_normalized_degrees = hue_degrees - 360. * (hue_degrees / 360.).floor(); + let hue = hue_normalized_degrees / 360.; + + // Saturation and lightness are clamped to 0% ... 100% + // https://drafts.csswg.org/css-color/#the-hsl-notation + let uses_commas = arguments.try_parse(|i| i.expect_comma()).is_ok(); + + let saturation = component_parser.parse_percentage(arguments)?; + let saturation = saturation.max(0.).min(1.); + + if uses_commas { + arguments.expect_comma()?; + } + + let lightness = component_parser.parse_percentage(arguments)?; + let lightness = lightness.max(0.).min(1.); + + // https://drafts.csswg.org/css-color/#hsl-color + // except with h pre-multiplied by 3, to avoid some rounding errors. + fn hue_to_rgb(m1: f32, m2: f32, mut h3: f32) -> f32 { + if h3 < 0. { + h3 += 3. + } + if h3 > 3. { + h3 -= 3. + } + + if h3 * 2. < 1. { + m1 + (m2 - m1) * h3 * 2. + } else if h3 * 2. < 3. { + m2 + } else if h3 < 2. { + m1 + (m2 - m1) * (2. - h3) * 2. + } else { + m1 + } + } + let m2 = if lightness <= 0.5 { + lightness * (saturation + 1.) + } else { + lightness + saturation - lightness * saturation + }; + let m1 = lightness * 2. - m2; + let hue_times_3 = hue * 3.; + let red = clamp_unit_f32(hue_to_rgb(m1, m2, hue_times_3 + 1.)); + let green = clamp_unit_f32(hue_to_rgb(m1, m2, hue_times_3)); + let blue = clamp_unit_f32(hue_to_rgb(m1, m2, hue_times_3 - 1.)); + return Ok((red, green, blue, uses_commas)); +} diff --git a/third_party/rust/cssparser/src/cow_rc_str.rs b/third_party/rust/cssparser/src/cow_rc_str.rs new file mode 100644 index 0000000000..f89f4cdacf --- /dev/null +++ b/third_party/rust/cssparser/src/cow_rc_str.rs @@ -0,0 +1,199 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::borrow::{Borrow, Cow}; +use std::cmp; +use std::fmt; +use std::hash; +use std::marker::PhantomData; +use std::mem; +use std::ops::Deref; +use std::rc::Rc; +use std::slice; +use std::str; +use std::usize; + +/// A string that is either shared (heap-allocated and reference-counted) or borrowed. +/// +/// Equivalent to `enum { Borrowed(&'a str), Shared(Rc<String>) }`, but stored more compactly. +/// +/// FIXME(https://github.com/rust-lang/rfcs/issues/1230): use an actual enum if/when +/// the compiler can do this layout optimization. +pub struct CowRcStr<'a> { + /// FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared. + /// In the meantime we abuse `&'static _` to get the effect of `NonZero<*const _>`. + /// `ptr` doesn’t really have the 'static lifetime! + ptr: &'static (), + + /// * If `borrowed_len_or_max == usize::MAX`, then `ptr` represents `NonZero<*const String>` + /// from `Rc::into_raw`. + /// The lifetime parameter `'a` is irrelevant in this case. + /// + /// * Otherwise, `ptr` represents the `NonZero<*const u8>` data component of `&'a str`, + /// and `borrowed_len_or_max` its length. + borrowed_len_or_max: usize, + + phantom: PhantomData<Result<&'a str, Rc<String>>>, +} + +fn _static_assert_same_size<'a>() { + // "Instantiate" the generic function without calling it. + let _ = mem::transmute::<CowRcStr<'a>, Option<CowRcStr<'a>>>; +} + +impl<'a> From<Cow<'a, str>> for CowRcStr<'a> { + #[inline] + fn from(s: Cow<'a, str>) -> Self { + match s { + Cow::Borrowed(s) => CowRcStr::from(s), + Cow::Owned(s) => CowRcStr::from(s), + } + } +} + +impl<'a> From<&'a str> for CowRcStr<'a> { + #[inline] + fn from(s: &'a str) -> Self { + let len = s.len(); + assert!(len < usize::MAX); + CowRcStr { + ptr: unsafe { &*(s.as_ptr() as *const ()) }, + borrowed_len_or_max: len, + phantom: PhantomData, + } + } +} + +impl<'a> From<String> for CowRcStr<'a> { + #[inline] + fn from(s: String) -> Self { + CowRcStr::from_rc(Rc::new(s)) + } +} + +impl<'a> CowRcStr<'a> { + #[inline] + fn from_rc(s: Rc<String>) -> Self { + let ptr = unsafe { &*(Rc::into_raw(s) as *const ()) }; + CowRcStr { + ptr: ptr, + borrowed_len_or_max: usize::MAX, + phantom: PhantomData, + } + } + + #[inline] + fn unpack(&self) -> Result<&'a str, *const String> { + if self.borrowed_len_or_max == usize::MAX { + Err(self.ptr as *const () as *const String) + } else { + unsafe { + Ok(str::from_utf8_unchecked(slice::from_raw_parts( + self.ptr as *const () as *const u8, + self.borrowed_len_or_max, + ))) + } + } + } +} + +impl<'a> Clone for CowRcStr<'a> { + #[inline] + fn clone(&self) -> Self { + match self.unpack() { + Err(ptr) => { + let rc = unsafe { Rc::from_raw(ptr) }; + let new_rc = rc.clone(); + mem::forget(rc); // Don’t actually take ownership of this strong reference + CowRcStr::from_rc(new_rc) + } + Ok(_) => CowRcStr { ..*self }, + } + } +} + +impl<'a> Drop for CowRcStr<'a> { + #[inline] + fn drop(&mut self) { + if let Err(ptr) = self.unpack() { + mem::drop(unsafe { Rc::from_raw(ptr) }) + } + } +} + +impl<'a> Deref for CowRcStr<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.unpack().unwrap_or_else(|ptr| unsafe { &**ptr }) + } +} + +// Boilerplate / trivial impls below. + +impl<'a> AsRef<str> for CowRcStr<'a> { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +impl<'a> Borrow<str> for CowRcStr<'a> { + #[inline] + fn borrow(&self) -> &str { + self + } +} + +impl<'a> Default for CowRcStr<'a> { + #[inline] + fn default() -> Self { + Self::from("") + } +} + +impl<'a> hash::Hash for CowRcStr<'a> { + #[inline] + fn hash<H: hash::Hasher>(&self, hasher: &mut H) { + str::hash(self, hasher) + } +} + +impl<'a, T: AsRef<str>> PartialEq<T> for CowRcStr<'a> { + #[inline] + fn eq(&self, other: &T) -> bool { + str::eq(self, other.as_ref()) + } +} + +impl<'a, T: AsRef<str>> PartialOrd<T> for CowRcStr<'a> { + #[inline] + fn partial_cmp(&self, other: &T) -> Option<cmp::Ordering> { + str::partial_cmp(self, other.as_ref()) + } +} + +impl<'a> Eq for CowRcStr<'a> {} + +impl<'a> Ord for CowRcStr<'a> { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + str::cmp(self, other) + } +} + +impl<'a> fmt::Display for CowRcStr<'a> { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + str::fmt(self, formatter) + } +} + +impl<'a> fmt::Debug for CowRcStr<'a> { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + str::fmt(self, formatter) + } +} diff --git a/third_party/rust/cssparser/src/from_bytes.rs b/third_party/rust/cssparser/src/from_bytes.rs new file mode 100644 index 0000000000..78a56d3e14 --- /dev/null +++ b/third_party/rust/cssparser/src/from_bytes.rs @@ -0,0 +1,64 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/// Abstraction for avoiding a dependency from cssparser to an encoding library +pub trait EncodingSupport { + /// One character encoding + type Encoding; + + /// https://encoding.spec.whatwg.org/#concept-encoding-get + fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding>; + + /// Return the UTF-8 encoding + fn utf8() -> Self::Encoding; + + /// Whether the given encoding is UTF-16BE or UTF-16LE + fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool; +} + +/// Determine the character encoding of a CSS stylesheet. +/// +/// This is based on the presence of a BOM (Byte Order Mark), an `@charset` rule, and +/// encoding meta-information. +/// +/// * `css_bytes`: A byte string. +/// * `protocol_encoding`: The encoding label, if any, defined by HTTP or equivalent protocol. +/// (e.g. via the `charset` parameter of the `Content-Type` header.) +/// * `environment_encoding`: An optional `Encoding` object for the [environment encoding] +/// (https://drafts.csswg.org/css-syntax/#environment-encoding), if any. +/// +/// Returns the encoding to use. +pub fn stylesheet_encoding<E>( + css: &[u8], + protocol_encoding_label: Option<&[u8]>, + environment_encoding: Option<E::Encoding>, +) -> E::Encoding +where + E: EncodingSupport, +{ + // https://drafts.csswg.org/css-syntax/#the-input-byte-stream + if let Some(label) = protocol_encoding_label { + if let Some(protocol_encoding) = E::from_label(label) { + return protocol_encoding; + }; + }; + + let prefix = b"@charset \""; + if css.starts_with(prefix) { + let rest = &css[prefix.len()..]; + if let Some(label_length) = rest.iter().position(|&b| b == b'"') { + if rest[label_length..].starts_with(b"\";") { + let label = &rest[..label_length]; + if let Some(charset_encoding) = E::from_label(label) { + if E::is_utf16_be_or_le(&charset_encoding) { + return E::utf8(); + } else { + return charset_encoding; + } + } + } + } + } + environment_encoding.unwrap_or_else(E::utf8) +} diff --git a/third_party/rust/cssparser/src/lib.rs b/third_party/rust/cssparser/src/lib.rs new file mode 100644 index 0000000000..6c3d3cb8a3 --- /dev/null +++ b/third_party/rust/cssparser/src/lib.rs @@ -0,0 +1,115 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#![crate_name = "cssparser"] +#![crate_type = "rlib"] +#![cfg_attr(feature = "bench", feature(test))] +#![deny(missing_docs)] + +/*! + +Implementation of [CSS Syntax Module Level 3](https://drafts.csswg.org/css-syntax/) for Rust. + +# Input + +Everything is based on `Parser` objects, which borrow a `&str` input. +If you have bytes (from a file, the network, or something) +and want to support character encodings other than UTF-8, +see the `stylesheet_encoding` function, +which can be used together with rust-encoding or encoding-rs. + +# Conventions for parsing functions + +* Take (at least) a `input: &mut cssparser::Parser` parameter +* Return `Result<_, ()>` +* When returning `Ok(_)`, + the function must have consumed exactly the amount of input that represents the parsed value. +* When returning `Err(())`, any amount of input may have been consumed. + +As a consequence, when calling another parsing function, either: + +* Any `Err(())` return value must be propagated. + This happens by definition for tail calls, + and can otherwise be done with the `?` operator. +* Or the call must be wrapped in a `Parser::try` call. + `try` takes a closure that takes a `Parser` and returns a `Result`, + calls it once, + and returns itself that same result. + If the result is `Err`, + it restores the position inside the input to the one saved before calling the closure. + +Examples: + +```{rust,ignore} +// 'none' | <image> +fn parse_background_image(context: &ParserContext, input: &mut Parser) + -> Result<Option<Image>, ()> { + if input.try_parse(|input| input.expect_ident_matching("none")).is_ok() { + Ok(None) + } else { + Image::parse(context, input).map(Some) // tail call + } +} +``` + +```{rust,ignore} +// [ <length> | <percentage> ] [ <length> | <percentage> ]? +fn parse_border_spacing(_context: &ParserContext, input: &mut Parser) + -> Result<(LengthOrPercentage, LengthOrPercentage), ()> { + let first = LengthOrPercentage::parse?; + let second = input.try_parse(LengthOrPercentage::parse).unwrap_or(first); + (first, second) +} +``` + +*/ + +#![recursion_limit = "200"] // For color::parse_color_keyword + +pub use crate::color::{ + parse_color_keyword, AngleOrNumber, Color, ColorComponentParser, NumberOrPercentage, RGBA, +}; +pub use crate::cow_rc_str::CowRcStr; +pub use crate::from_bytes::{stylesheet_encoding, EncodingSupport}; +#[doc(hidden)] +pub use crate::macros::_cssparser_internal_to_lowercase; +pub use crate::nth::parse_nth; +pub use crate::parser::{BasicParseError, BasicParseErrorKind, ParseError, ParseErrorKind}; +pub use crate::parser::{Delimiter, Delimiters, Parser, ParserInput, ParserState}; +pub use crate::rules_and_declarations::{parse_important, parse_one_declaration}; +pub use crate::rules_and_declarations::{parse_one_rule, RuleListParser}; +pub use crate::rules_and_declarations::{AtRuleParser, AtRuleType, QualifiedRuleParser}; +pub use crate::rules_and_declarations::{DeclarationListParser, DeclarationParser}; +pub use crate::serializer::{serialize_identifier, serialize_name, serialize_string}; +pub use crate::serializer::{CssStringWriter, ToCss, TokenSerializationType}; +pub use crate::tokenizer::{SourceLocation, SourcePosition, Token}; +pub use crate::unicode_range::UnicodeRange; +pub use cssparser_macros::*; +#[doc(hidden)] +pub use phf as _cssparser_internal_phf; + +#[macro_use] +mod macros; + +mod rules_and_declarations; + +#[cfg(feature = "dummy_match_byte")] +mod tokenizer; + +#[cfg(not(feature = "dummy_match_byte"))] +mod tokenizer { + include!(concat!(env!("OUT_DIR"), "/tokenizer.rs")); +} +mod color; +mod cow_rc_str; +mod from_bytes; +mod nth; +mod parser; +mod serializer; +mod unicode_range; + +#[cfg(test)] +mod size_of_tests; +#[cfg(test)] +mod tests; diff --git a/third_party/rust/cssparser/src/macros.rs b/third_party/rust/cssparser/src/macros.rs new file mode 100644 index 0000000000..234920d891 --- /dev/null +++ b/third_party/rust/cssparser/src/macros.rs @@ -0,0 +1,196 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use matches::matches; +use std::mem::MaybeUninit; + +/// Expands to a `match` expression with string patterns, +/// matching case-insensitively in the ASCII range. +/// +/// The patterns must not contain ASCII upper case letters. (They must be already be lower-cased.) +/// +/// # Example +/// +/// ```rust +/// #[macro_use] extern crate cssparser; +/// +/// # fn main() {} // Make doctest not wrap everythig in its own main +/// # fn dummy(function_name: &String) { let _ = +/// match_ignore_ascii_case! { &function_name, +/// "rgb" => parse_rgb(..), +/// # #[cfg(not(something))] +/// "rgba" => parse_rgba(..), +/// "hsl" => parse_hsl(..), +/// "hsla" => parse_hsla(..), +/// _ => Err(format!("unknown function: {}", function_name)) +/// } +/// # ;} +/// # use std::ops::RangeFull; +/// # fn parse_rgb(_: RangeFull) -> Result<(), String> { Ok(()) } +/// # fn parse_rgba(_: RangeFull) -> Result<(), String> { Ok(()) } +/// # fn parse_hsl(_: RangeFull) -> Result<(), String> { Ok(()) } +/// # fn parse_hsla(_: RangeFull) -> Result<(), String> { Ok(()) } +/// ``` +#[macro_export] +macro_rules! match_ignore_ascii_case { + ( $input:expr, + $( + $( #[$meta: meta] )* + $( $pattern: pat )|+ $( if $guard: expr )? => $then: expr + ),+ + $(,)? + ) => { + { + // This dummy module works around the feature gate + // `error[E0658]: procedural macros cannot be expanded to statements` + // by forcing the macro to be in an item context + // rather than expression/statement context, + // even though the macro only expands to items. + mod cssparser_internal { + $crate::_cssparser_internal_max_len! { + $( $( $pattern )+ )+ + } + } + _cssparser_internal_to_lowercase!($input, cssparser_internal::MAX_LENGTH => lowercase); + // "A" is a short string that we know is different for every string pattern, + // since we’ve verified that none of them include ASCII upper case letters. + match lowercase.unwrap_or("A") { + $( + $( #[$meta] )* + $( $pattern )|+ $( if $guard )? => $then, + )+ + } + } + }; +} + +/// Define a function `$name(&str) -> Option<&'static $ValueType>` +/// +/// The function finds a match for the input string +/// in a [`phf` map](https://github.com/sfackler/rust-phf) +/// and returns a reference to the corresponding value. +/// Matching is case-insensitive in the ASCII range. +/// +/// ## Example: +/// +/// ```rust +/// #[macro_use] extern crate cssparser; +/// +/// # fn main() {} // Make doctest not wrap everything in its own main +/// +/// fn color_rgb(input: &str) -> Option<(u8, u8, u8)> { +/// ascii_case_insensitive_phf_map! { +/// keyword -> (u8, u8, u8) = { +/// "red" => (255, 0, 0), +/// "green" => (0, 255, 0), +/// "blue" => (0, 0, 255), +/// } +/// } +/// keyword(input).cloned() +/// } +#[macro_export] +macro_rules! ascii_case_insensitive_phf_map { + ($name: ident -> $ValueType: ty = { $( $key: tt => $value: expr ),+ }) => { + ascii_case_insensitive_phf_map!($name -> $ValueType = { $( $key => $value, )+ }) + }; + ($name: ident -> $ValueType: ty = { $( $key: tt => $value: expr, )+ }) => { + pub fn $name(input: &str) -> Option<&'static $ValueType> { + // This dummy module works around a feature gate, + // see comment on the similar module in `match_ignore_ascii_case!` above. + mod _cssparser_internal { + $crate::_cssparser_internal_max_len! { + $( $key )+ + } + } + use $crate::_cssparser_internal_phf as phf; + static MAP: phf::Map<&'static str, $ValueType> = phf::phf_map! { + $( + $key => $value, + )* + }; + _cssparser_internal_to_lowercase!(input, _cssparser_internal::MAX_LENGTH => lowercase); + lowercase.and_then(|s| MAP.get(s)) + } + } +} + +/// Implementation detail of match_ignore_ascii_case! and ascii_case_insensitive_phf_map! macros. +/// +/// **This macro is not part of the public API. It can change or be removed between any versions.** +/// +/// Define a local variable named `$output` +/// and assign it the result of calling `_cssparser_internal_to_lowercase` +/// with a stack-allocated buffer of length `$BUFFER_SIZE`. +#[macro_export] +#[doc(hidden)] +macro_rules! _cssparser_internal_to_lowercase { + ($input: expr, $BUFFER_SIZE: expr => $output: ident) => { + #[allow(unsafe_code)] + let mut buffer = unsafe { + ::std::mem::MaybeUninit::<[::std::mem::MaybeUninit<u8>; $BUFFER_SIZE]>::uninit() + .assume_init() + }; + let input: &str = $input; + let $output = $crate::_cssparser_internal_to_lowercase(&mut buffer, input); + }; +} + +/// Implementation detail of match_ignore_ascii_case! and ascii_case_insensitive_phf_map! macros. +/// +/// **This function is not part of the public API. It can change or be removed between any verisons.** +/// +/// If `input` is larger than buffer, return `None`. +/// Otherwise, return `input` ASCII-lowercased, using `buffer` as temporary space if necessary. +#[doc(hidden)] +#[allow(non_snake_case)] +#[inline] +pub fn _cssparser_internal_to_lowercase<'a>( + buffer: &'a mut [MaybeUninit<u8>], + input: &'a str, +) -> Option<&'a str> { + let buffer = buffer.get_mut(..input.len())?; + + #[cold] + fn make_ascii_lowercase<'a>( + buffer: &'a mut [MaybeUninit<u8>], + input: &'a str, + first_uppercase: usize, + ) -> &'a str { + unsafe { + // This cast doesn't change the pointer's validity + // since `u8` has the same layout as `MaybeUninit<u8>`: + let input_bytes = &*(input.as_bytes() as *const [u8] as *const [MaybeUninit<u8>]); + + buffer.copy_from_slice(&*input_bytes); + + // Same as above re layout, plus these bytes have been initialized: + let buffer = &mut *(buffer as *mut [MaybeUninit<u8>] as *mut [u8]); + + buffer[first_uppercase..].make_ascii_lowercase(); + // `buffer` was initialized to a copy of `input` + // (which is `&str` so well-formed UTF-8) + // then ASCII-lowercased (which preserves UTF-8 well-formedness): + ::std::str::from_utf8_unchecked(buffer) + } + } + + Some( + match input.bytes().position(|byte| matches!(byte, b'A'..=b'Z')) { + Some(first_uppercase) => make_ascii_lowercase(buffer, input, first_uppercase), + // common case: input is already lower-case + None => input, + }, + ) +} + +#[cfg(feature = "dummy_match_byte")] +macro_rules! match_byte { + ($value:expr, $($rest:tt)* ) => { + match $value { + $( + $rest + )+ + } + }; +} diff --git a/third_party/rust/cssparser/src/nth.rs b/third_party/rust/cssparser/src/nth.rs new file mode 100644 index 0000000000..a0c0a0070c --- /dev/null +++ b/third_party/rust/cssparser/src/nth.rs @@ -0,0 +1,146 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use super::{BasicParseError, Parser, ParserInput, Token}; +use matches::matches; + +/// Parse the *An+B* notation, as found in the `:nth-child()` selector. +/// The input is typically the arguments of a function, +/// in which case the caller needs to check if the arguments’ parser is exhausted. +/// Return `Ok((A, B))`, or `Err(())` for a syntax error. +pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), BasicParseError<'i>> { + match *input.next()? { + Token::Number { + int_value: Some(b), .. + } => Ok((0, b)), + Token::Dimension { + int_value: Some(a), + ref unit, + .. + } => { + match_ignore_ascii_case! { + unit, + "n" => Ok(parse_b(input, a)?), + "n-" => Ok(parse_signless_b(input, a, -1)?), + _ => match parse_n_dash_digits(&*unit) { + Ok(b) => Ok((a, b)), + Err(()) => { + let unit = unit.clone(); + Err(input.new_basic_unexpected_token_error(Token::Ident(unit))) + } + } + } + } + Token::Ident(ref value) => { + match_ignore_ascii_case! { value, + "even" => Ok((2, 0)), + "odd" => Ok((2, 1)), + "n" => Ok(parse_b(input, 1)?), + "-n" => Ok(parse_b(input, -1)?), + "n-" => Ok(parse_signless_b(input, 1, -1)?), + "-n-" => Ok(parse_signless_b(input, -1, -1)?), + _ => { + let (slice, a) = if value.starts_with("-") { + (&value[1..], -1) + } else { + (&**value, 1) + }; + match parse_n_dash_digits(slice) { + Ok(b) => Ok((a, b)), + Err(()) => { + let value = value.clone(); + Err(input.new_basic_unexpected_token_error(Token::Ident(value))) + } + } + } + } + } + Token::Delim('+') => match *input.next_including_whitespace()? { + Token::Ident(ref value) => { + match_ignore_ascii_case! { value, + "n" => parse_b(input, 1), + "n-" => parse_signless_b(input, 1, -1), + _ => match parse_n_dash_digits(value) { + Ok(b) => Ok((1, b)), + Err(()) => { + let value = value.clone(); + Err(input.new_basic_unexpected_token_error(Token::Ident(value))) + } + } + } + } + ref token => { + let token = token.clone(); + Err(input.new_basic_unexpected_token_error(token)) + } + }, + ref token => { + let token = token.clone(); + Err(input.new_basic_unexpected_token_error(token)) + } + } +} + +fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), BasicParseError<'i>> { + let start = input.state(); + match input.next() { + Ok(&Token::Delim('+')) => parse_signless_b(input, a, 1), + Ok(&Token::Delim('-')) => parse_signless_b(input, a, -1), + Ok(&Token::Number { + has_sign: true, + int_value: Some(b), + .. + }) => Ok((a, b)), + _ => { + input.reset(&start); + Ok((a, 0)) + } + } +} + +fn parse_signless_b<'i, 't>( + input: &mut Parser<'i, 't>, + a: i32, + b_sign: i32, +) -> Result<(i32, i32), BasicParseError<'i>> { + // FIXME: remove .clone() when lifetimes are non-lexical. + match input.next()?.clone() { + Token::Number { + has_sign: false, + int_value: Some(b), + .. + } => Ok((a, b_sign * b)), + token => Err(input.new_basic_unexpected_token_error(token)), + } +} + +fn parse_n_dash_digits(string: &str) -> Result<i32, ()> { + let bytes = string.as_bytes(); + if bytes.len() >= 3 + && bytes[..2].eq_ignore_ascii_case(b"n-") + && bytes[2..].iter().all(|&c| matches!(c, b'0'..=b'9')) + { + Ok(parse_number_saturate(&string[1..]).unwrap()) // Include the minus sign + } else { + Err(()) + } +} + +fn parse_number_saturate(string: &str) -> Result<i32, ()> { + let mut input = ParserInput::new(string); + let mut parser = Parser::new(&mut input); + let int = if let Ok(&Token::Number { + int_value: Some(int), + .. + }) = parser.next_including_whitespace_and_comments() + { + int + } else { + return Err(()); + }; + if !parser.is_exhausted() { + return Err(()); + } + Ok(int) +} diff --git a/third_party/rust/cssparser/src/parser.rs b/third_party/rust/cssparser/src/parser.rs new file mode 100644 index 0000000000..8b2a231b2f --- /dev/null +++ b/third_party/rust/cssparser/src/parser.rs @@ -0,0 +1,1073 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::cow_rc_str::CowRcStr; +use crate::tokenizer::{SourceLocation, SourcePosition, Token, Tokenizer}; +use smallvec::SmallVec; +use std::ops::BitOr; +use std::ops::Range; + +/// A capture of the internal state of a `Parser` (including the position within the input), +/// obtained from the `Parser::position` method. +/// +/// Can be used with the `Parser::reset` method to restore that state. +/// Should only be used with the `Parser` instance it came from. +#[derive(Debug, Clone)] +pub struct ParserState { + pub(crate) position: usize, + pub(crate) current_line_start_position: usize, + pub(crate) current_line_number: u32, + pub(crate) at_start_of: Option<BlockType>, +} + +impl ParserState { + /// The position from the start of the input, counted in UTF-8 bytes. + #[inline] + pub fn position(&self) -> SourcePosition { + SourcePosition(self.position) + } + + /// The line number and column number + #[inline] + pub fn source_location(&self) -> SourceLocation { + SourceLocation { + line: self.current_line_number, + column: (self.position - self.current_line_start_position + 1) as u32, + } + } +} + +/// Details about a `BasicParseError` +#[derive(Clone, Debug, PartialEq)] +pub enum BasicParseErrorKind<'i> { + /// An unexpected token was encountered. + UnexpectedToken(Token<'i>), + /// The end of the input was encountered unexpectedly. + EndOfInput, + /// An `@` rule was encountered that was invalid. + AtRuleInvalid(CowRcStr<'i>), + /// The body of an '@' rule was invalid. + AtRuleBodyInvalid, + /// A qualified rule was encountered that was invalid. + QualifiedRuleInvalid, +} + +/// The funamental parsing errors that can be triggered by built-in parsing routines. +#[derive(Clone, Debug, PartialEq)] +pub struct BasicParseError<'i> { + /// Details of this error + pub kind: BasicParseErrorKind<'i>, + /// Location where this error occurred + pub location: SourceLocation, +} + +impl<'i, T> From<BasicParseError<'i>> for ParseError<'i, T> { + #[inline] + fn from(this: BasicParseError<'i>) -> ParseError<'i, T> { + ParseError { + kind: ParseErrorKind::Basic(this.kind), + location: this.location, + } + } +} + +impl SourceLocation { + /// Create a new BasicParseError at this location for an unexpected token + #[inline] + pub fn new_basic_unexpected_token_error<'i>(self, token: Token<'i>) -> BasicParseError<'i> { + BasicParseError { + kind: BasicParseErrorKind::UnexpectedToken(token), + location: self, + } + } + + /// Create a new ParseError at this location for an unexpected token + #[inline] + pub fn new_unexpected_token_error<'i, E>(self, token: Token<'i>) -> ParseError<'i, E> { + ParseError { + kind: ParseErrorKind::Basic(BasicParseErrorKind::UnexpectedToken(token)), + location: self, + } + } + + /// Create a new custom ParseError at this location + #[inline] + pub fn new_custom_error<'i, E1: Into<E2>, E2>(self, error: E1) -> ParseError<'i, E2> { + ParseError { + kind: ParseErrorKind::Custom(error.into()), + location: self, + } + } +} + +/// Details of a `ParseError` +#[derive(Clone, Debug, PartialEq)] +pub enum ParseErrorKind<'i, T: 'i> { + /// A fundamental parse error from a built-in parsing routine. + Basic(BasicParseErrorKind<'i>), + /// A parse error reported by downstream consumer code. + Custom(T), +} + +impl<'i, T> ParseErrorKind<'i, T> { + /// Like `std::convert::Into::into` + pub fn into<U>(self) -> ParseErrorKind<'i, U> + where + T: Into<U>, + { + match self { + ParseErrorKind::Basic(basic) => ParseErrorKind::Basic(basic), + ParseErrorKind::Custom(custom) => ParseErrorKind::Custom(custom.into()), + } + } +} + +/// Extensible parse errors that can be encountered by client parsing implementations. +#[derive(Clone, Debug, PartialEq)] +pub struct ParseError<'i, E> { + /// Details of this error + pub kind: ParseErrorKind<'i, E>, + /// Location where this error occurred + pub location: SourceLocation, +} + +impl<'i, T> ParseError<'i, T> { + /// Extract the fundamental parse error from an extensible error. + pub fn basic(self) -> BasicParseError<'i> { + match self.kind { + ParseErrorKind::Basic(kind) => BasicParseError { + kind: kind, + location: self.location, + }, + ParseErrorKind::Custom(_) => panic!("Not a basic parse error"), + } + } + + /// Like `std::convert::Into::into` + pub fn into<U>(self) -> ParseError<'i, U> + where + T: Into<U>, + { + ParseError { + kind: self.kind.into(), + location: self.location, + } + } +} + +/// The owned input for a parser. +pub struct ParserInput<'i> { + tokenizer: Tokenizer<'i>, + cached_token: Option<CachedToken<'i>>, +} + +struct CachedToken<'i> { + token: Token<'i>, + start_position: SourcePosition, + end_state: ParserState, +} + +impl<'i> ParserInput<'i> { + /// Create a new input for a parser. + pub fn new(input: &'i str) -> ParserInput<'i> { + ParserInput { + tokenizer: Tokenizer::new(input), + cached_token: None, + } + } + + /// Create a new input for a parser. Line numbers in locations + /// are offset by the given value. + pub fn new_with_line_number_offset(input: &'i str, first_line_number: u32) -> ParserInput<'i> { + ParserInput { + tokenizer: Tokenizer::with_first_line_number(input, first_line_number), + cached_token: None, + } + } + + #[inline] + fn cached_token_ref(&self) -> &Token<'i> { + &self.cached_token.as_ref().unwrap().token + } +} + +/// A CSS parser that borrows its `&str` input, +/// yields `Token`s, +/// and keeps track of nested blocks and functions. +pub struct Parser<'i, 't> { + input: &'t mut ParserInput<'i>, + /// If `Some(_)`, .parse_nested_block() can be called. + at_start_of: Option<BlockType>, + /// For parsers from `parse_until` or `parse_nested_block` + stop_before: Delimiters, +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub(crate) enum BlockType { + Parenthesis, + SquareBracket, + CurlyBracket, +} + +impl BlockType { + fn opening(token: &Token) -> Option<BlockType> { + match *token { + Token::Function(_) | Token::ParenthesisBlock => Some(BlockType::Parenthesis), + Token::SquareBracketBlock => Some(BlockType::SquareBracket), + Token::CurlyBracketBlock => Some(BlockType::CurlyBracket), + _ => None, + } + } + + fn closing(token: &Token) -> Option<BlockType> { + match *token { + Token::CloseParenthesis => Some(BlockType::Parenthesis), + Token::CloseSquareBracket => Some(BlockType::SquareBracket), + Token::CloseCurlyBracket => Some(BlockType::CurlyBracket), + _ => None, + } + } +} + +/// A set of characters, to be used with the `Parser::parse_until*` methods. +/// +/// The union of two sets can be obtained with the `|` operator. Example: +/// +/// ```{rust,ignore} +/// input.parse_until_before(Delimiter::CurlyBracketBlock | Delimiter::Semicolon) +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Delimiters { + bits: u8, +} + +/// `Delimiters` constants. +#[allow(non_upper_case_globals, non_snake_case)] +pub mod Delimiter { + use super::Delimiters; + + /// The empty delimiter set + pub const None: Delimiters = Delimiters { bits: 0 }; + /// The delimiter set with only the `{` opening curly bracket + pub const CurlyBracketBlock: Delimiters = Delimiters { bits: 1 << 1 }; + /// The delimiter set with only the `;` semicolon + pub const Semicolon: Delimiters = Delimiters { bits: 1 << 2 }; + /// The delimiter set with only the `!` exclamation point + pub const Bang: Delimiters = Delimiters { bits: 1 << 3 }; + /// The delimiter set with only the `,` comma + pub const Comma: Delimiters = Delimiters { bits: 1 << 4 }; +} + +#[allow(non_upper_case_globals, non_snake_case)] +mod ClosingDelimiter { + use super::Delimiters; + + pub const CloseCurlyBracket: Delimiters = Delimiters { bits: 1 << 5 }; + pub const CloseSquareBracket: Delimiters = Delimiters { bits: 1 << 6 }; + pub const CloseParenthesis: Delimiters = Delimiters { bits: 1 << 7 }; +} + +impl BitOr<Delimiters> for Delimiters { + type Output = Delimiters; + + #[inline] + fn bitor(self, other: Delimiters) -> Delimiters { + Delimiters { + bits: self.bits | other.bits, + } + } +} + +impl Delimiters { + #[inline] + fn contains(self, other: Delimiters) -> bool { + (self.bits & other.bits) != 0 + } + + #[inline] + fn from_byte(byte: Option<u8>) -> Delimiters { + match byte { + Some(b';') => Delimiter::Semicolon, + Some(b'!') => Delimiter::Bang, + Some(b',') => Delimiter::Comma, + Some(b'{') => Delimiter::CurlyBracketBlock, + Some(b'}') => ClosingDelimiter::CloseCurlyBracket, + Some(b']') => ClosingDelimiter::CloseSquareBracket, + Some(b')') => ClosingDelimiter::CloseParenthesis, + _ => Delimiter::None, + } + } +} + +/// Used in some `fn expect_*` methods +macro_rules! expect { + ($parser: ident, $($branches: tt)+) => { + { + let start_location = $parser.current_source_location(); + match *$parser.next()? { + $($branches)+ + ref token => { + return Err(start_location.new_basic_unexpected_token_error(token.clone())) + } + } + } + } +} + +impl<'i: 't, 't> Parser<'i, 't> { + /// Create a new parser + #[inline] + pub fn new(input: &'t mut ParserInput<'i>) -> Parser<'i, 't> { + Parser { + input: input, + at_start_of: None, + stop_before: Delimiter::None, + } + } + + /// Return the current line that is being parsed. + pub fn current_line(&self) -> &'i str { + self.input.tokenizer.current_source_line() + } + + /// Check whether the input is exhausted. That is, if `.next()` would return a token. + /// + /// This ignores whitespace and comments. + #[inline] + pub fn is_exhausted(&mut self) -> bool { + self.expect_exhausted().is_ok() + } + + /// Check whether the input is exhausted. That is, if `.next()` would return a token. + /// Return a `Result` so that the `?` operator can be used: `input.expect_exhausted()?` + /// + /// This ignores whitespace and comments. + #[inline] + pub fn expect_exhausted(&mut self) -> Result<(), BasicParseError<'i>> { + let start = self.state(); + let result = match self.next() { + Err(BasicParseError { + kind: BasicParseErrorKind::EndOfInput, + .. + }) => Ok(()), + Err(e) => unreachable!("Unexpected error encountered: {:?}", e), + Ok(t) => Err(start + .source_location() + .new_basic_unexpected_token_error(t.clone())), + }; + self.reset(&start); + result + } + + /// Return the current position within the input. + /// + /// This can be used with the `Parser::slice` and `slice_from` methods. + #[inline] + pub fn position(&self) -> SourcePosition { + self.input.tokenizer.position() + } + + /// The current line number and column number. + #[inline] + pub fn current_source_location(&self) -> SourceLocation { + self.input.tokenizer.current_source_location() + } + + /// The source map URL, if known. + /// + /// The source map URL is extracted from a specially formatted + /// comment. The last such comment is used, so this value may + /// change as parsing proceeds. + pub fn current_source_map_url(&self) -> Option<&str> { + self.input.tokenizer.current_source_map_url() + } + + /// The source URL, if known. + /// + /// The source URL is extracted from a specially formatted + /// comment. The last such comment is used, so this value may + /// change as parsing proceeds. + pub fn current_source_url(&self) -> Option<&str> { + self.input.tokenizer.current_source_url() + } + + /// Create a new BasicParseError at the current location + #[inline] + pub fn new_basic_error(&self, kind: BasicParseErrorKind<'i>) -> BasicParseError<'i> { + BasicParseError { + kind: kind, + location: self.current_source_location(), + } + } + + /// Create a new basic ParseError at the current location + #[inline] + pub fn new_error<E>(&self, kind: BasicParseErrorKind<'i>) -> ParseError<'i, E> { + ParseError { + kind: ParseErrorKind::Basic(kind), + location: self.current_source_location(), + } + } + + /// Create a new custom BasicParseError at the current location + #[inline] + pub fn new_custom_error<E1: Into<E2>, E2>(&self, error: E1) -> ParseError<'i, E2> { + self.current_source_location().new_custom_error(error) + } + + /// Create a new unexpected token BasicParseError at the current location + #[inline] + pub fn new_basic_unexpected_token_error(&self, token: Token<'i>) -> BasicParseError<'i> { + self.new_basic_error(BasicParseErrorKind::UnexpectedToken(token)) + } + + /// Create a new unexpected token ParseError at the current location + #[inline] + pub fn new_unexpected_token_error<E>(&self, token: Token<'i>) -> ParseError<'i, E> { + self.new_error(BasicParseErrorKind::UnexpectedToken(token)) + } + + /// Create a new unexpected token or EOF ParseError at the current location + #[inline] + pub fn new_error_for_next_token<E>(&mut self) -> ParseError<'i, E> { + let token = match self.next() { + Ok(token) => token.clone(), + Err(e) => return e.into(), + }; + self.new_error(BasicParseErrorKind::UnexpectedToken(token)) + } + + /// Return the current internal state of the parser (including position within the input). + /// + /// This state can later be restored with the `Parser::reset` method. + #[inline] + pub fn state(&self) -> ParserState { + ParserState { + at_start_of: self.at_start_of, + ..self.input.tokenizer.state() + } + } + + /// Advance the input until the next token that’s not whitespace or a comment. + #[inline] + pub fn skip_whitespace(&mut self) { + if let Some(block_type) = self.at_start_of.take() { + consume_until_end_of_block(block_type, &mut self.input.tokenizer); + } + + self.input.tokenizer.skip_whitespace() + } + + #[inline] + pub(crate) fn skip_cdc_and_cdo(&mut self) { + if let Some(block_type) = self.at_start_of.take() { + consume_until_end_of_block(block_type, &mut self.input.tokenizer); + } + + self.input.tokenizer.skip_cdc_and_cdo() + } + + #[inline] + pub(crate) fn next_byte(&self) -> Option<u8> { + let byte = self.input.tokenizer.next_byte(); + if self.stop_before.contains(Delimiters::from_byte(byte)) { + return None; + } + byte + } + + /// Restore the internal state of the parser (including position within the input) + /// to what was previously saved by the `Parser::position` method. + /// + /// Should only be used with `SourcePosition` values from the same `Parser` instance. + #[inline] + pub fn reset(&mut self, state: &ParserState) { + self.input.tokenizer.reset(state); + self.at_start_of = state.at_start_of; + } + + /// Start looking for `var()` / `env()` functions. (See the + /// `.seen_var_or_env_functions()` method.) + #[inline] + pub fn look_for_var_or_env_functions(&mut self) { + self.input.tokenizer.look_for_var_or_env_functions() + } + + /// Return whether a `var()` or `env()` function has been seen by the + /// tokenizer since either `look_for_var_or_env_functions` was called, and + /// stop looking. + #[inline] + pub fn seen_var_or_env_functions(&mut self) -> bool { + self.input.tokenizer.seen_var_or_env_functions() + } + + /// The old name of `try_parse`, which requires raw identifiers in the Rust 2018 edition. + #[inline] + pub fn r#try<F, T, E>(&mut self, thing: F) -> Result<T, E> + where + F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>, + { + self.try_parse(thing) + } + + /// Execute the given closure, passing it the parser. + /// If the result (returned unchanged) is `Err`, + /// the internal state of the parser (including position within the input) + /// is restored to what it was before the call. + #[inline] + pub fn try_parse<F, T, E>(&mut self, thing: F) -> Result<T, E> + where + F: FnOnce(&mut Parser<'i, 't>) -> Result<T, E>, + { + let start = self.state(); + let result = thing(self); + if result.is_err() { + self.reset(&start) + } + result + } + + /// Return a slice of the CSS input + #[inline] + pub fn slice(&self, range: Range<SourcePosition>) -> &'i str { + self.input.tokenizer.slice(range) + } + + /// Return a slice of the CSS input, from the given position to the current one. + #[inline] + pub fn slice_from(&self, start_position: SourcePosition) -> &'i str { + self.input.tokenizer.slice_from(start_position) + } + + /// Return the next token in the input that is neither whitespace or a comment, + /// and advance the position accordingly. + /// + /// After returning a `Function`, `ParenthesisBlock`, + /// `CurlyBracketBlock`, or `SquareBracketBlock` token, + /// the next call will skip until after the matching `CloseParenthesis`, + /// `CloseCurlyBracket`, or `CloseSquareBracket` token. + /// + /// See the `Parser::parse_nested_block` method to parse the content of functions or blocks. + /// + /// This only returns a closing token when it is unmatched (and therefore an error). + pub fn next(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> { + self.skip_whitespace(); + self.next_including_whitespace_and_comments() + } + + /// Same as `Parser::next`, but does not skip whitespace tokens. + pub fn next_including_whitespace(&mut self) -> Result<&Token<'i>, BasicParseError<'i>> { + loop { + match self.next_including_whitespace_and_comments() { + Err(e) => return Err(e), + Ok(&Token::Comment(_)) => {} + _ => break, + } + } + Ok(self.input.cached_token_ref()) + } + + /// Same as `Parser::next`, but does not skip whitespace or comment tokens. + /// + /// **Note**: This should only be used in contexts like a CSS pre-processor + /// where comments are preserved. + /// When parsing higher-level values, per the CSS Syntax specification, + /// comments should always be ignored between tokens. + pub fn next_including_whitespace_and_comments( + &mut self, + ) -> Result<&Token<'i>, BasicParseError<'i>> { + if let Some(block_type) = self.at_start_of.take() { + consume_until_end_of_block(block_type, &mut self.input.tokenizer); + } + + let byte = self.input.tokenizer.next_byte(); + if self.stop_before.contains(Delimiters::from_byte(byte)) { + return Err(self.new_basic_error(BasicParseErrorKind::EndOfInput)); + } + + let token_start_position = self.input.tokenizer.position(); + let using_cached_token = self + .input + .cached_token + .as_ref() + .map_or(false, |cached_token| { + cached_token.start_position == token_start_position + }); + let token = if using_cached_token { + let cached_token = self.input.cached_token.as_ref().unwrap(); + self.input.tokenizer.reset(&cached_token.end_state); + match cached_token.token { + Token::Function(ref name) => self.input.tokenizer.see_function(name), + _ => {} + } + &cached_token.token + } else { + let new_token = self + .input + .tokenizer + .next() + .map_err(|()| self.new_basic_error(BasicParseErrorKind::EndOfInput))?; + self.input.cached_token = Some(CachedToken { + token: new_token, + start_position: token_start_position, + end_state: self.input.tokenizer.state(), + }); + self.input.cached_token_ref() + }; + + if let Some(block_type) = BlockType::opening(token) { + self.at_start_of = Some(block_type); + } + Ok(token) + } + + /// Have the given closure parse something, then check the the input is exhausted. + /// The result is overridden to `Err(())` if some input remains. + /// + /// This can help tell e.g. `color: green;` from `color: green 4px;` + #[inline] + pub fn parse_entirely<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>> + where + F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ParseError<'i, E>>, + { + let result = parse(self)?; + self.expect_exhausted()?; + Ok(result) + } + + /// Parse a list of comma-separated values, all with the same syntax. + /// + /// The given closure is called repeatedly with a "delimited" parser + /// (see the `Parser::parse_until_before` method) + /// so that it can over consume the input past a comma at this block/function nesting level. + /// + /// Successful results are accumulated in a vector. + /// + /// This method retuns `Err(())` the first time that a closure call does, + /// or if a closure call leaves some input before the next comma or the end of the input. + #[inline] + pub fn parse_comma_separated<F, T, E>( + &mut self, + mut parse_one: F, + ) -> Result<Vec<T>, ParseError<'i, E>> + where + F: for<'tt> FnMut(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, + { + // Vec grows from 0 to 4 by default on first push(). So allocate with + // capacity 1, so in the somewhat common case of only one item we don't + // way overallocate. Note that we always push at least one item if + // parsing succeeds. + let mut values = Vec::with_capacity(1); + loop { + self.skip_whitespace(); // Unnecessary for correctness, but may help try() in parse_one rewind less. + values.push(self.parse_until_before(Delimiter::Comma, &mut parse_one)?); + match self.next() { + Err(_) => return Ok(values), + Ok(&Token::Comma) => continue, + Ok(_) => unreachable!(), + } + } + } + + /// Parse the content of a block or function. + /// + /// This method panics if the last token yielded by this parser + /// (from one of the `next*` methods) + /// is not a on that marks the start of a block or function: + /// a `Function`, `ParenthesisBlock`, `CurlyBracketBlock`, or `SquareBracketBlock`. + /// + /// The given closure is called with a "delimited" parser + /// that stops at the end of the block or function (at the matching closing token). + /// + /// The result is overridden to `Err(())` if the closure leaves some input before that point. + #[inline] + pub fn parse_nested_block<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>> + where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, + { + parse_nested_block(self, parse) + } + + /// Limit parsing to until a given delimiter or the end of the input. (E.g. + /// a semicolon for a property value.) + /// + /// The given closure is called with a "delimited" parser + /// that stops before the first character at this block/function nesting level + /// that matches the given set of delimiters, or at the end of the input. + /// + /// The result is overridden to `Err(())` if the closure leaves some input before that point. + #[inline] + pub fn parse_until_before<F, T, E>( + &mut self, + delimiters: Delimiters, + parse: F, + ) -> Result<T, ParseError<'i, E>> + where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, + { + parse_until_before(self, delimiters, parse) + } + + /// Like `parse_until_before`, but also consume the delimiter token. + /// + /// This can be useful when you don’t need to know which delimiter it was + /// (e.g. if these is only one in the given set) + /// or if it was there at all (as opposed to reaching the end of the input). + #[inline] + pub fn parse_until_after<F, T, E>( + &mut self, + delimiters: Delimiters, + parse: F, + ) -> Result<T, ParseError<'i, E>> + where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, + { + parse_until_after(self, delimiters, parse) + } + + /// Parse a <whitespace-token> and return its value. + #[inline] + pub fn expect_whitespace(&mut self) -> Result<&'i str, BasicParseError<'i>> { + let start_location = self.current_source_location(); + match *self.next_including_whitespace()? { + Token::WhiteSpace(value) => Ok(value), + ref t => Err(start_location.new_basic_unexpected_token_error(t.clone())), + } + } + + /// Parse a <ident-token> and return the unescaped value. + #[inline] + pub fn expect_ident(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::Ident(ref value) => Ok(value), + } + } + + /// expect_ident, but clone the CowRcStr + #[inline] + pub fn expect_ident_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> { + self.expect_ident().map(|s| s.clone()) + } + + /// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value. + #[inline] + pub fn expect_ident_matching( + &mut self, + expected_value: &str, + ) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()), + } + } + + /// Parse a <string-token> and return the unescaped value. + #[inline] + pub fn expect_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::QuotedString(ref value) => Ok(value), + } + } + + /// expect_string, but clone the CowRcStr + #[inline] + pub fn expect_string_cloned(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> { + self.expect_string().map(|s| s.clone()) + } + + /// Parse either a <ident-token> or a <string-token>, and return the unescaped value. + #[inline] + pub fn expect_ident_or_string(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::Ident(ref value) => Ok(value), + Token::QuotedString(ref value) => Ok(value), + } + } + + /// Parse a <url-token> and return the unescaped value. + #[inline] + pub fn expect_url(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::UnquotedUrl(ref value) => Ok(value.clone()), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { + self.parse_nested_block(|input| { + input.expect_string().map_err(Into::into).map(|s| s.clone()) + }) + .map_err(ParseError::<()>::basic) + } + } + } + + /// Parse either a <url-token> or a <string-token>, and return the unescaped value. + #[inline] + pub fn expect_url_or_string(&mut self) -> Result<CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::UnquotedUrl(ref value) => Ok(value.clone()), + Token::QuotedString(ref value) => Ok(value.clone()), + Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { + self.parse_nested_block(|input| { + input.expect_string().map_err(Into::into).map(|s| s.clone()) + }) + .map_err(ParseError::<()>::basic) + } + } + } + + /// Parse a <number-token> and return the integer value. + #[inline] + pub fn expect_number(&mut self) -> Result<f32, BasicParseError<'i>> { + expect! {self, + Token::Number { value, .. } => Ok(value), + } + } + + /// Parse a <number-token> that does not have a fractional part, and return the integer value. + #[inline] + pub fn expect_integer(&mut self) -> Result<i32, BasicParseError<'i>> { + expect! {self, + Token::Number { int_value: Some(int_value), .. } => Ok(int_value), + } + } + + /// Parse a <percentage-token> and return the value. + /// `0%` and `100%` map to `0.0` and `1.0` (not `100.0`), respectively. + #[inline] + pub fn expect_percentage(&mut self) -> Result<f32, BasicParseError<'i>> { + expect! {self, + Token::Percentage { unit_value, .. } => Ok(unit_value), + } + } + + /// Parse a `:` <colon-token>. + #[inline] + pub fn expect_colon(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Colon => Ok(()), + } + } + + /// Parse a `;` <semicolon-token>. + #[inline] + pub fn expect_semicolon(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Semicolon => Ok(()), + } + } + + /// Parse a `,` <comma-token>. + #[inline] + pub fn expect_comma(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Comma => Ok(()), + } + } + + /// Parse a <delim-token> with the given value. + #[inline] + pub fn expect_delim(&mut self, expected_value: char) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Delim(value) if value == expected_value => Ok(()), + } + } + + /// Parse a `{ /* ... */ }` curly brackets block. + /// + /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. + #[inline] + pub fn expect_curly_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::CurlyBracketBlock => Ok(()), + } + } + + /// Parse a `[ /* ... */ ]` square brackets block. + /// + /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. + #[inline] + pub fn expect_square_bracket_block(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::SquareBracketBlock => Ok(()), + } + } + + /// Parse a `( /* ... */ )` parenthesis block. + /// + /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. + #[inline] + pub fn expect_parenthesis_block(&mut self) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::ParenthesisBlock => Ok(()), + } + } + + /// Parse a <function> token and return its name. + /// + /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. + #[inline] + pub fn expect_function(&mut self) -> Result<&CowRcStr<'i>, BasicParseError<'i>> { + expect! {self, + Token::Function(ref name) => Ok(name), + } + } + + /// Parse a <function> token whose name is an ASCII-insensitive match for the given value. + /// + /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. + #[inline] + pub fn expect_function_matching( + &mut self, + expected_name: &str, + ) -> Result<(), BasicParseError<'i>> { + expect! {self, + Token::Function(ref name) if name.eq_ignore_ascii_case(expected_name) => Ok(()), + } + } + + /// Parse the input until exhaustion and check that it contains no “error” token. + /// + /// See `Token::is_parse_error`. This also checks nested blocks and functions recursively. + #[inline] + pub fn expect_no_error_token(&mut self) -> Result<(), BasicParseError<'i>> { + loop { + match self.next_including_whitespace_and_comments() { + Ok(&Token::Function(_)) + | Ok(&Token::ParenthesisBlock) + | Ok(&Token::SquareBracketBlock) + | Ok(&Token::CurlyBracketBlock) => self + .parse_nested_block(|input| input.expect_no_error_token().map_err(Into::into)) + .map_err(ParseError::<()>::basic)?, + Ok(t) => { + // FIXME: maybe these should be separate variants of + // BasicParseError instead? + if t.is_parse_error() { + let token = t.clone(); + return Err(self.new_basic_unexpected_token_error(token)); + } + } + Err(_) => return Ok(()), + } + } + } +} + +pub fn parse_until_before<'i: 't, 't, F, T, E>( + parser: &mut Parser<'i, 't>, + delimiters: Delimiters, + parse: F, +) -> Result<T, ParseError<'i, E>> +where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, +{ + let delimiters = parser.stop_before | delimiters; + let result; + // Introduce a new scope to limit duration of nested_parser’s borrow + { + let mut delimited_parser = Parser { + input: parser.input, + at_start_of: parser.at_start_of.take(), + stop_before: delimiters, + }; + result = delimited_parser.parse_entirely(parse); + if let Some(block_type) = delimited_parser.at_start_of { + consume_until_end_of_block(block_type, &mut delimited_parser.input.tokenizer); + } + } + // FIXME: have a special-purpose tokenizer method for this that does less work. + loop { + if delimiters.contains(Delimiters::from_byte(parser.input.tokenizer.next_byte())) { + break; + } + if let Ok(token) = parser.input.tokenizer.next() { + if let Some(block_type) = BlockType::opening(&token) { + consume_until_end_of_block(block_type, &mut parser.input.tokenizer); + } + } else { + break; + } + } + result +} + +pub fn parse_until_after<'i: 't, 't, F, T, E>( + parser: &mut Parser<'i, 't>, + delimiters: Delimiters, + parse: F, +) -> Result<T, ParseError<'i, E>> +where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, +{ + let result = parser.parse_until_before(delimiters, parse); + let next_byte = parser.input.tokenizer.next_byte(); + if next_byte.is_some() + && !parser + .stop_before + .contains(Delimiters::from_byte(next_byte)) + { + debug_assert!(delimiters.contains(Delimiters::from_byte(next_byte))); + // We know this byte is ASCII. + parser.input.tokenizer.advance(1); + if next_byte == Some(b'{') { + consume_until_end_of_block(BlockType::CurlyBracket, &mut parser.input.tokenizer); + } + } + result +} + +pub fn parse_nested_block<'i: 't, 't, F, T, E>( + parser: &mut Parser<'i, 't>, + parse: F, +) -> Result<T, ParseError<'i, E>> +where + F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ParseError<'i, E>>, +{ + let block_type = parser.at_start_of.take().expect( + "\ + A nested parser can only be created when a Function, \ + ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \ + token was just consumed.\ + ", + ); + let closing_delimiter = match block_type { + BlockType::CurlyBracket => ClosingDelimiter::CloseCurlyBracket, + BlockType::SquareBracket => ClosingDelimiter::CloseSquareBracket, + BlockType::Parenthesis => ClosingDelimiter::CloseParenthesis, + }; + let result; + // Introduce a new scope to limit duration of nested_parser’s borrow + { + let mut nested_parser = Parser { + input: parser.input, + at_start_of: None, + stop_before: closing_delimiter, + }; + result = nested_parser.parse_entirely(parse); + if let Some(block_type) = nested_parser.at_start_of { + consume_until_end_of_block(block_type, &mut nested_parser.input.tokenizer); + } + } + consume_until_end_of_block(block_type, &mut parser.input.tokenizer); + result +} + +#[inline(never)] +#[cold] +fn consume_until_end_of_block(block_type: BlockType, tokenizer: &mut Tokenizer) { + let mut stack = SmallVec::<[BlockType; 16]>::new(); + stack.push(block_type); + + // FIXME: have a special-purpose tokenizer method for this that does less work. + while let Ok(ref token) = tokenizer.next() { + if let Some(b) = BlockType::closing(token) { + if *stack.last().unwrap() == b { + stack.pop(); + if stack.is_empty() { + return; + } + } + } + + if let Some(block_type) = BlockType::opening(token) { + stack.push(block_type); + } + } +} diff --git a/third_party/rust/cssparser/src/rules_and_declarations.rs b/third_party/rust/cssparser/src/rules_and_declarations.rs new file mode 100644 index 0000000000..5d334a6f29 --- /dev/null +++ b/third_party/rust/cssparser/src/rules_and_declarations.rs @@ -0,0 +1,528 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// https://drafts.csswg.org/css-syntax/#parsing + +use super::{BasicParseError, BasicParseErrorKind, Delimiter}; +use super::{ParseError, Parser, Token}; +use crate::cow_rc_str::CowRcStr; +use crate::parser::{parse_nested_block, parse_until_after, parse_until_before, ParserState}; + +/// Parse `!important`. +/// +/// Typical usage is `input.try_parse(parse_important).is_ok()` +/// at the end of a `DeclarationParser::parse_value` implementation. +pub fn parse_important<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseError<'i>> { + input.expect_delim('!')?; + input.expect_ident_matching("important") +} + +/// The return value for `AtRuleParser::parse_prelude`. +/// Indicates whether the at-rule is expected to have a `{ /* ... */ }` block +/// or end with a `;` semicolon. +pub enum AtRuleType<P, PB> { + /// The at-rule is expected to end with a `;` semicolon. Example: `@import`. + /// + /// The value is the representation of all data of the rule which would be + /// handled in rule_without_block. + WithoutBlock(P), + + /// The at-rule is expected to have a a `{ /* ... */ }` block. Example: `@media` + /// + /// The value is the representation of the "prelude" part of the rule. + WithBlock(PB), +} + +/// A trait to provide various parsing of declaration values. +/// +/// For example, there could be different implementations for property declarations in style rules +/// and for descriptors in `@font-face` rules. +pub trait DeclarationParser<'i> { + /// The finished representation of a declaration. + type Declaration; + + /// The error type that is included in the ParseError value that can be returned. + type Error: 'i; + + /// Parse the value of a declaration with the given `name`. + /// + /// Return the finished representation for the declaration + /// as returned by `DeclarationListParser::next`, + /// or `Err(())` to ignore the entire declaration as invalid. + /// + /// Declaration name matching should be case-insensitive in the ASCII range. + /// This can be done with `std::ascii::Ascii::eq_ignore_ascii_case`, + /// or with the `match_ignore_ascii_case!` macro. + /// + /// The given `input` is a "delimited" parser + /// that ends wherever the declaration value should end. + /// (In declaration lists, before the next semicolon or end of the current block.) + /// + /// If `!important` can be used in a given context, + /// `input.try_parse(parse_important).is_ok()` should be used at the end + /// of the implementation of this method and the result should be part of the return value. + fn parse_value<'t>( + &mut self, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + ) -> Result<Self::Declaration, ParseError<'i, Self::Error>>; +} + +/// A trait to provide various parsing of at-rules. +/// +/// For example, there could be different implementations for top-level at-rules +/// (`@media`, `@font-face`, …) +/// and for page-margin rules inside `@page`. +/// +/// Default implementations that reject all at-rules are provided, +/// so that `impl AtRuleParser<(), ()> for ... {}` can be used +/// for using `DeclarationListParser` to parse a declarations list with only qualified rules. +pub trait AtRuleParser<'i> { + /// The intermediate representation of prelude of an at-rule without block; + type PreludeNoBlock; + + /// The intermediate representation of prelude of an at-rule with block; + type PreludeBlock; + + /// The finished representation of an at-rule. + type AtRule; + + /// The error type that is included in the ParseError value that can be returned. + type Error: 'i; + + /// Parse the prelude of an at-rule with the given `name`. + /// + /// Return the representation of the prelude and the type of at-rule, + /// or `Err(())` to ignore the entire at-rule as invalid. + /// + /// See `AtRuleType`’s documentation for the return value. + /// + /// The prelude is the part after the at-keyword + /// and before the `;` semicolon or `{ /* ... */ }` block. + /// + /// At-rule name matching should be case-insensitive in the ASCII range. + /// This can be done with `std::ascii::Ascii::eq_ignore_ascii_case`, + /// or with the `match_ignore_ascii_case!` macro. + /// + /// The given `input` is a "delimited" parser + /// that ends wherever the prelude should end. + /// (Before the next semicolon, the next `{`, or the end of the current block.) + fn parse_prelude<'t>( + &mut self, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + ) -> Result<AtRuleType<Self::PreludeNoBlock, Self::PreludeBlock>, ParseError<'i, Self::Error>> + { + let _ = name; + let _ = input; + Err(input.new_error(BasicParseErrorKind::AtRuleInvalid(name))) + } + + /// End an at-rule which doesn't have block. Return the finished + /// representation of the at-rule. + /// + /// The location passed in is source location of the start of the prelude. + /// + /// This is only called when `parse_prelude` returned `WithoutBlock`, and + /// either the `;` semicolon indeed follows the prelude, or parser is at + /// the end of the input. + fn rule_without_block( + &mut self, + prelude: Self::PreludeNoBlock, + start: &ParserState, + ) -> Self::AtRule { + let _ = prelude; + let _ = start; + panic!( + "The `AtRuleParser::rule_without_block` method must be overriden \ + if `AtRuleParser::parse_prelude` ever returns `AtRuleType::WithoutBlock`." + ) + } + + /// Parse the content of a `{ /* ... */ }` block for the body of the at-rule. + /// + /// The location passed in is source location of the start of the prelude. + /// + /// Return the finished representation of the at-rule + /// as returned by `RuleListParser::next` or `DeclarationListParser::next`, + /// or `Err(())` to ignore the entire at-rule as invalid. + /// + /// This is only called when `parse_prelude` returned `WithBlock`, and a block + /// was indeed found following the prelude. + fn parse_block<'t>( + &mut self, + prelude: Self::PreludeBlock, + start: &ParserState, + input: &mut Parser<'i, 't>, + ) -> Result<Self::AtRule, ParseError<'i, Self::Error>> { + let _ = prelude; + let _ = start; + let _ = input; + Err(input.new_error(BasicParseErrorKind::AtRuleBodyInvalid)) + } +} + +/// A trait to provide various parsing of qualified rules. +/// +/// For example, there could be different implementations +/// for top-level qualified rules (i.e. style rules with Selectors as prelude) +/// and for qualified rules inside `@keyframes` (keyframe rules with keyframe selectors as prelude). +/// +/// Default implementations that reject all qualified rules are provided, +/// so that `impl QualifiedRuleParser<(), ()> for ... {}` can be used +/// for example for using `RuleListParser` to parse a rule list with only at-rules +/// (such as inside `@font-feature-values`). +pub trait QualifiedRuleParser<'i> { + /// The intermediate representation of a qualified rule prelude. + type Prelude; + + /// The finished representation of a qualified rule. + type QualifiedRule; + + /// The error type that is included in the ParseError value that can be returned. + type Error: 'i; + + /// Parse the prelude of a qualified rule. For style rules, this is as Selector list. + /// + /// Return the representation of the prelude, + /// or `Err(())` to ignore the entire at-rule as invalid. + /// + /// The prelude is the part before the `{ /* ... */ }` block. + /// + /// The given `input` is a "delimited" parser + /// that ends where the prelude should end (before the next `{`). + fn parse_prelude<'t>( + &mut self, + input: &mut Parser<'i, 't>, + ) -> Result<Self::Prelude, ParseError<'i, Self::Error>> { + let _ = input; + Err(input.new_error(BasicParseErrorKind::QualifiedRuleInvalid)) + } + + /// Parse the content of a `{ /* ... */ }` block for the body of the qualified rule. + /// + /// The location passed in is source location of the start of the prelude. + /// + /// Return the finished representation of the qualified rule + /// as returned by `RuleListParser::next`, + /// or `Err(())` to ignore the entire at-rule as invalid. + fn parse_block<'t>( + &mut self, + prelude: Self::Prelude, + start: &ParserState, + input: &mut Parser<'i, 't>, + ) -> Result<Self::QualifiedRule, ParseError<'i, Self::Error>> { + let _ = prelude; + let _ = start; + let _ = input; + Err(input.new_error(BasicParseErrorKind::QualifiedRuleInvalid)) + } +} + +/// Provides an iterator for declaration list parsing. +pub struct DeclarationListParser<'i, 't, 'a, P> { + /// The input given to `DeclarationListParser::new` + pub input: &'a mut Parser<'i, 't>, + + /// The parser given to `DeclarationListParser::new` + pub parser: P, +} + +impl<'i, 't, 'a, I, P, E: 'i> DeclarationListParser<'i, 't, 'a, P> +where + P: DeclarationParser<'i, Declaration = I, Error = E> + AtRuleParser<'i, AtRule = I, Error = E>, +{ + /// Create a new `DeclarationListParser` for the given `input` and `parser`. + /// + /// Note that all CSS declaration lists can on principle contain at-rules. + /// Even if no such valid at-rule exists (yet), + /// this affects error handling: at-rules end at `{}` blocks, not just semicolons. + /// + /// The given `parser` therefore needs to implement + /// both `DeclarationParser` and `AtRuleParser` traits. + /// However, the latter can be an empty `impl` + /// since `AtRuleParser` provides default implementations of its methods. + /// + /// The return type for finished declarations and at-rules also needs to be the same, + /// since `<DeclarationListParser as Iterator>::next` can return either. + /// It could be a custom enum. + pub fn new(input: &'a mut Parser<'i, 't>, parser: P) -> Self { + DeclarationListParser { + input: input, + parser: parser, + } + } +} + +/// `DeclarationListParser` is an iterator that yields `Ok(_)` for a valid declaration or at-rule +/// or `Err(())` for an invalid one. +impl<'i, 't, 'a, I, P, E: 'i> Iterator for DeclarationListParser<'i, 't, 'a, P> +where + P: DeclarationParser<'i, Declaration = I, Error = E> + AtRuleParser<'i, AtRule = I, Error = E>, +{ + type Item = Result<I, (ParseError<'i, E>, &'i str)>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + let start = self.input.state(); + match self.input.next_including_whitespace_and_comments() { + Ok(&Token::WhiteSpace(_)) | Ok(&Token::Comment(_)) | Ok(&Token::Semicolon) => { + continue + } + Ok(&Token::Ident(ref name)) => { + let name = name.clone(); + let result = { + let parser = &mut self.parser; + // FIXME: https://github.com/servo/rust-cssparser/issues/254 + let callback = |input: &mut Parser<'i, '_>| { + input.expect_colon()?; + parser.parse_value(name, input) + }; + parse_until_after(self.input, Delimiter::Semicolon, callback) + }; + return Some(result.map_err(|e| (e, self.input.slice_from(start.position())))); + } + Ok(&Token::AtKeyword(ref name)) => { + let name = name.clone(); + return Some(parse_at_rule(&start, name, self.input, &mut self.parser)); + } + Ok(token) => { + let token = token.clone(); + let result = self.input.parse_until_after(Delimiter::Semicolon, |_| { + Err(start.source_location().new_unexpected_token_error(token)) + }); + return Some(result.map_err(|e| (e, self.input.slice_from(start.position())))); + } + Err(..) => return None, + } + } + } +} + +/// Provides an iterator for rule list parsing. +pub struct RuleListParser<'i, 't, 'a, P> { + /// The input given to `RuleListParser::new` + pub input: &'a mut Parser<'i, 't>, + + /// The parser given to `RuleListParser::new` + pub parser: P, + + is_stylesheet: bool, + any_rule_so_far: bool, +} + +impl<'i, 't, 'a, R, P, E: 'i> RuleListParser<'i, 't, 'a, P> +where + P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + + AtRuleParser<'i, AtRule = R, Error = E>, +{ + /// Create a new `RuleListParser` for the given `input` at the top-level of a stylesheet + /// and the given `parser`. + /// + /// The given `parser` needs to implement both `QualifiedRuleParser` and `AtRuleParser` traits. + /// However, either of them can be an empty `impl` + /// since the traits provide default implementations of their methods. + /// + /// The return type for finished qualified rules and at-rules also needs to be the same, + /// since `<RuleListParser as Iterator>::next` can return either. + /// It could be a custom enum. + pub fn new_for_stylesheet(input: &'a mut Parser<'i, 't>, parser: P) -> Self { + RuleListParser { + input: input, + parser: parser, + is_stylesheet: true, + any_rule_so_far: false, + } + } + + /// Same is `new_for_stylesheet`, but should be used for rule lists inside a block + /// such as the body of an `@media` rule. + /// + /// This differs in that `<!--` and `-->` tokens + /// should only be ignored at the stylesheet top-level. + /// (This is to deal with legacy work arounds for `<style>` HTML element parsing.) + pub fn new_for_nested_rule(input: &'a mut Parser<'i, 't>, parser: P) -> Self { + RuleListParser { + input: input, + parser: parser, + is_stylesheet: false, + any_rule_so_far: false, + } + } +} + +/// `RuleListParser` is an iterator that yields `Ok(_)` for a rule or `Err(())` for an invalid one. +impl<'i, 't, 'a, R, P, E: 'i> Iterator for RuleListParser<'i, 't, 'a, P> +where + P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + + AtRuleParser<'i, AtRule = R, Error = E>, +{ + type Item = Result<R, (ParseError<'i, E>, &'i str)>; + + fn next(&mut self) -> Option<Self::Item> { + loop { + if self.is_stylesheet { + self.input.skip_cdc_and_cdo() + } else { + self.input.skip_whitespace() + } + let start = self.input.state(); + + let at_keyword = match self.input.next_byte()? { + b'@' => match self.input.next_including_whitespace_and_comments() { + Ok(&Token::AtKeyword(ref name)) => Some(name.clone()), + _ => { + self.input.reset(&start); + None + } + }, + _ => None, + }; + + if let Some(name) = at_keyword { + let first_stylesheet_rule = self.is_stylesheet && !self.any_rule_so_far; + self.any_rule_so_far = true; + if first_stylesheet_rule && name.eq_ignore_ascii_case("charset") { + let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; + let _: Result<(), ParseError<()>> = + self.input.parse_until_after(delimiters, |_| Ok(())); + } else { + return Some(parse_at_rule( + &start, + name.clone(), + self.input, + &mut self.parser, + )); + } + } else { + self.any_rule_so_far = true; + let result = parse_qualified_rule(self.input, &mut self.parser); + return Some(result.map_err(|e| (e, self.input.slice_from(start.position())))); + } + } + } +} + +/// Parse a single declaration, such as an `( /* ... */ )` parenthesis in an `@supports` prelude. +pub fn parse_one_declaration<'i, 't, P, E>( + input: &mut Parser<'i, 't>, + parser: &mut P, +) -> Result<<P as DeclarationParser<'i>>::Declaration, (ParseError<'i, E>, &'i str)> +where + P: DeclarationParser<'i, Error = E>, +{ + let start_position = input.position(); + input + .parse_entirely(|input| { + let name = input.expect_ident()?.clone(); + input.expect_colon()?; + parser.parse_value(name, input) + }) + .map_err(|e| (e, input.slice_from(start_position))) +} + +/// Parse a single rule, such as for CSSOM’s `CSSStyleSheet.insertRule`. +pub fn parse_one_rule<'i, 't, R, P, E>( + input: &mut Parser<'i, 't>, + parser: &mut P, +) -> Result<R, ParseError<'i, E>> +where + P: QualifiedRuleParser<'i, QualifiedRule = R, Error = E> + + AtRuleParser<'i, AtRule = R, Error = E>, +{ + input.parse_entirely(|input| { + input.skip_whitespace(); + let start = input.state(); + let at_keyword = if input.next_byte() == Some(b'@') { + match *input.next_including_whitespace_and_comments()? { + Token::AtKeyword(ref name) => Some(name.clone()), + _ => { + input.reset(&start); + None + } + } + } else { + None + }; + + if let Some(name) = at_keyword { + parse_at_rule(&start, name, input, parser).map_err(|e| e.0) + } else { + parse_qualified_rule(input, parser) + } + }) +} + +fn parse_at_rule<'i, 't, P, E>( + start: &ParserState, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + parser: &mut P, +) -> Result<<P as AtRuleParser<'i>>::AtRule, (ParseError<'i, E>, &'i str)> +where + P: AtRuleParser<'i, Error = E>, +{ + let delimiters = Delimiter::Semicolon | Delimiter::CurlyBracketBlock; + // FIXME: https://github.com/servo/rust-cssparser/issues/254 + let callback = |input: &mut Parser<'i, '_>| parser.parse_prelude(name, input); + let result = parse_until_before(input, delimiters, callback); + match result { + Ok(AtRuleType::WithoutBlock(prelude)) => match input.next() { + Ok(&Token::Semicolon) | Err(_) => Ok(parser.rule_without_block(prelude, start)), + Ok(&Token::CurlyBracketBlock) => Err(( + input.new_unexpected_token_error(Token::CurlyBracketBlock), + input.slice_from(start.position()), + )), + Ok(_) => unreachable!(), + }, + Ok(AtRuleType::WithBlock(prelude)) => { + match input.next() { + Ok(&Token::CurlyBracketBlock) => { + // FIXME: https://github.com/servo/rust-cssparser/issues/254 + let callback = + |input: &mut Parser<'i, '_>| parser.parse_block(prelude, start, input); + parse_nested_block(input, callback) + .map_err(|e| (e, input.slice_from(start.position()))) + } + Ok(&Token::Semicolon) => Err(( + input.new_unexpected_token_error(Token::Semicolon), + input.slice_from(start.position()), + )), + Err(e) => Err((e.into(), input.slice_from(start.position()))), + Ok(_) => unreachable!(), + } + } + Err(error) => { + let end_position = input.position(); + match input.next() { + Ok(&Token::CurlyBracketBlock) | Ok(&Token::Semicolon) | Err(_) => {} + _ => unreachable!(), + }; + Err((error, input.slice(start.position()..end_position))) + } + } +} + +fn parse_qualified_rule<'i, 't, P, E>( + input: &mut Parser<'i, 't>, + parser: &mut P, +) -> Result<<P as QualifiedRuleParser<'i>>::QualifiedRule, ParseError<'i, E>> +where + P: QualifiedRuleParser<'i, Error = E>, +{ + let start = input.state(); + // FIXME: https://github.com/servo/rust-cssparser/issues/254 + let callback = |input: &mut Parser<'i, '_>| parser.parse_prelude(input); + let prelude = parse_until_before(input, Delimiter::CurlyBracketBlock, callback); + match *input.next()? { + Token::CurlyBracketBlock => { + // Do this here so that we consume the `{` even if the prelude is `Err`. + let prelude = prelude?; + // FIXME: https://github.com/servo/rust-cssparser/issues/254 + let callback = |input: &mut Parser<'i, '_>| parser.parse_block(prelude, &start, input); + parse_nested_block(input, callback) + } + _ => unreachable!(), + } +} diff --git a/third_party/rust/cssparser/src/serializer.rs b/third_party/rust/cssparser/src/serializer.rs new file mode 100644 index 0000000000..abd50c3d30 --- /dev/null +++ b/third_party/rust/cssparser/src/serializer.rs @@ -0,0 +1,552 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use dtoa_short::{self, Notation}; +use itoa; +use matches::matches; +use std::fmt::{self, Write}; +use std::io; +use std::str; + +use super::Token; + +/// Trait for things the can serialize themselves in CSS syntax. +pub trait ToCss { + /// Serialize `self` in CSS syntax, writing to `dest`. + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write; + + /// Serialize `self` in CSS syntax and return a string. + /// + /// (This is a convenience wrapper for `to_css` and probably should not be overridden.) + #[inline] + fn to_css_string(&self) -> String { + let mut s = String::new(); + self.to_css(&mut s).unwrap(); + s + } +} + +#[inline] +fn write_numeric<W>(value: f32, int_value: Option<i32>, has_sign: bool, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + // `value.value >= 0` is true for negative 0. + if has_sign && value.is_sign_positive() { + dest.write_str("+")?; + } + + let notation = if value == 0.0 && value.is_sign_negative() { + // Negative zero. Work around #20596. + dest.write_str("-0")?; + Notation { + decimal_point: false, + scientific: false, + } + } else { + dtoa_short::write(dest, value)? + }; + + if int_value.is_none() && value.fract() == 0. { + if !notation.decimal_point && !notation.scientific { + dest.write_str(".0")?; + } + } + Ok(()) +} + +impl<'a> ToCss for Token<'a> { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + match *self { + Token::Ident(ref value) => serialize_identifier(&**value, dest)?, + Token::AtKeyword(ref value) => { + dest.write_str("@")?; + serialize_identifier(&**value, dest)?; + } + Token::Hash(ref value) => { + dest.write_str("#")?; + serialize_name(value, dest)?; + } + Token::IDHash(ref value) => { + dest.write_str("#")?; + serialize_identifier(&**value, dest)?; + } + Token::QuotedString(ref value) => serialize_string(&**value, dest)?, + Token::UnquotedUrl(ref value) => { + dest.write_str("url(")?; + serialize_unquoted_url(&**value, dest)?; + dest.write_str(")")?; + } + Token::Delim(value) => dest.write_char(value)?, + + Token::Number { + value, + int_value, + has_sign, + } => write_numeric(value, int_value, has_sign, dest)?, + Token::Percentage { + unit_value, + int_value, + has_sign, + } => { + write_numeric(unit_value * 100., int_value, has_sign, dest)?; + dest.write_str("%")?; + } + Token::Dimension { + value, + int_value, + has_sign, + ref unit, + } => { + write_numeric(value, int_value, has_sign, dest)?; + // Disambiguate with scientific notation. + let unit = &**unit; + if unit == "e" || unit == "E" || unit.starts_with("e-") || unit.starts_with("E-") { + dest.write_str("\\65 ")?; + serialize_name(&unit[1..], dest)?; + } else { + serialize_identifier(unit, dest)?; + } + } + + Token::WhiteSpace(content) => dest.write_str(content)?, + Token::Comment(content) => { + dest.write_str("/*")?; + dest.write_str(content)?; + dest.write_str("*/")? + } + Token::Colon => dest.write_str(":")?, + Token::Semicolon => dest.write_str(";")?, + Token::Comma => dest.write_str(",")?, + Token::IncludeMatch => dest.write_str("~=")?, + Token::DashMatch => dest.write_str("|=")?, + Token::PrefixMatch => dest.write_str("^=")?, + Token::SuffixMatch => dest.write_str("$=")?, + Token::SubstringMatch => dest.write_str("*=")?, + Token::CDO => dest.write_str("<!--")?, + Token::CDC => dest.write_str("-->")?, + + Token::Function(ref name) => { + serialize_identifier(&**name, dest)?; + dest.write_str("(")?; + } + Token::ParenthesisBlock => dest.write_str("(")?, + Token::SquareBracketBlock => dest.write_str("[")?, + Token::CurlyBracketBlock => dest.write_str("{")?, + + Token::BadUrl(ref contents) => { + dest.write_str("url(")?; + dest.write_str(contents)?; + dest.write_char(')')?; + } + Token::BadString(ref value) => { + // During tokenization, an unescaped newline after a quote causes + // the token to be a BadString instead of a QuotedString. + // The BadString token ends just before the newline + // (which is in a separate WhiteSpace token), + // and therefore does not have a closing quote. + dest.write_char('"')?; + CssStringWriter::new(dest).write_str(value)?; + } + Token::CloseParenthesis => dest.write_str(")")?, + Token::CloseSquareBracket => dest.write_str("]")?, + Token::CloseCurlyBracket => dest.write_str("}")?, + } + Ok(()) + } +} + +fn hex_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + static HEX_DIGITS: &'static [u8; 16] = b"0123456789abcdef"; + let b3; + let b4; + let bytes = if ascii_byte > 0x0F { + let high = (ascii_byte >> 4) as usize; + let low = (ascii_byte & 0x0F) as usize; + b4 = [b'\\', HEX_DIGITS[high], HEX_DIGITS[low], b' ']; + &b4[..] + } else { + b3 = [b'\\', HEX_DIGITS[ascii_byte as usize], b' ']; + &b3[..] + }; + dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) }) +} + +fn char_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + let bytes = [b'\\', ascii_byte]; + dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) }) +} + +/// Write a CSS identifier, escaping characters as necessary. +pub fn serialize_identifier<W>(mut value: &str, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + if value.is_empty() { + return Ok(()); + } + + if value.starts_with("--") { + dest.write_str("--")?; + serialize_name(&value[2..], dest) + } else if value == "-" { + dest.write_str("\\-") + } else { + if value.as_bytes()[0] == b'-' { + dest.write_str("-")?; + value = &value[1..]; + } + if let digit @ b'0'..=b'9' = value.as_bytes()[0] { + hex_escape(digit, dest)?; + value = &value[1..]; + } + serialize_name(value, dest) + } +} + +/// Write a CSS name, like a custom property name. +/// +/// You should only use this when you know what you're doing, when in doubt, +/// consider using `serialize_identifier`. +pub fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + let mut chunk_start = 0; + for (i, b) in value.bytes().enumerate() { + let escaped = match b { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' => continue, + _ if !b.is_ascii() => continue, + b'\0' => Some("\u{FFFD}"), + _ => None, + }; + dest.write_str(&value[chunk_start..i])?; + if let Some(escaped) = escaped { + dest.write_str(escaped)?; + } else if (b >= b'\x01' && b <= b'\x1F') || b == b'\x7F' { + hex_escape(b, dest)?; + } else { + char_escape(b, dest)?; + } + chunk_start = i + 1; + } + dest.write_str(&value[chunk_start..]) +} + +fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + let mut chunk_start = 0; + for (i, b) in value.bytes().enumerate() { + let hex = match b { + b'\0'..=b' ' | b'\x7F' => true, + b'(' | b')' | b'"' | b'\'' | b'\\' => false, + _ => continue, + }; + dest.write_str(&value[chunk_start..i])?; + if hex { + hex_escape(b, dest)?; + } else { + char_escape(b, dest)?; + } + chunk_start = i + 1; + } + dest.write_str(&value[chunk_start..]) +} + +/// Write a double-quoted CSS string token, escaping content as necessary. +pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result +where + W: fmt::Write, +{ + dest.write_str("\"")?; + CssStringWriter::new(dest).write_str(value)?; + dest.write_str("\"")?; + Ok(()) +} + +/// A `fmt::Write` adapter that escapes text for writing as a double-quoted CSS string. +/// Quotes are not included. +/// +/// Typical usage: +/// +/// ```{rust,ignore} +/// fn write_foo<W>(foo: &Foo, dest: &mut W) -> fmt::Result where W: fmt::Write { +/// dest.write_str("\"")?; +/// { +/// let mut string_dest = CssStringWriter::new(dest); +/// // Write into string_dest... +/// } +/// dest.write_str("\"")?; +/// Ok(()) +/// } +/// ``` +pub struct CssStringWriter<'a, W> { + inner: &'a mut W, +} + +impl<'a, W> CssStringWriter<'a, W> +where + W: fmt::Write, +{ + /// Wrap a text writer to create a `CssStringWriter`. + pub fn new(inner: &'a mut W) -> CssStringWriter<'a, W> { + CssStringWriter { inner: inner } + } +} + +impl<'a, W> fmt::Write for CssStringWriter<'a, W> +where + W: fmt::Write, +{ + fn write_str(&mut self, s: &str) -> fmt::Result { + let mut chunk_start = 0; + for (i, b) in s.bytes().enumerate() { + let escaped = match b { + b'"' => Some("\\\""), + b'\\' => Some("\\\\"), + b'\0' => Some("\u{FFFD}"), + b'\x01'..=b'\x1F' | b'\x7F' => None, + _ => continue, + }; + self.inner.write_str(&s[chunk_start..i])?; + match escaped { + Some(x) => self.inner.write_str(x)?, + None => hex_escape(b, self.inner)?, + }; + chunk_start = i + 1; + } + self.inner.write_str(&s[chunk_start..]) + } +} + +macro_rules! impl_tocss_for_int { + ($T: ty) => { + impl<'a> ToCss for $T { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + struct AssumeUtf8<W: fmt::Write>(W); + + impl<W: fmt::Write> io::Write for AssumeUtf8<W> { + #[inline] + fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { + // Safety: itoa only emits ASCII, which is also well-formed UTF-8. + debug_assert!(buf.is_ascii()); + self.0 + .write_str(unsafe { str::from_utf8_unchecked(buf) }) + .map_err(|_| io::ErrorKind::Other.into()) + } + + #[inline] + fn write(&mut self, buf: &[u8]) -> io::Result<usize> { + self.write_all(buf)?; + Ok(buf.len()) + } + + #[inline] + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + match itoa::write(AssumeUtf8(dest), *self) { + Ok(_) => Ok(()), + Err(_) => Err(fmt::Error), + } + } + } + }; +} + +impl_tocss_for_int!(i8); +impl_tocss_for_int!(u8); +impl_tocss_for_int!(i16); +impl_tocss_for_int!(u16); +impl_tocss_for_int!(i32); +impl_tocss_for_int!(u32); +impl_tocss_for_int!(i64); +impl_tocss_for_int!(u64); + +macro_rules! impl_tocss_for_float { + ($T: ty) => { + impl<'a> ToCss for $T { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + dtoa_short::write(dest, *self).map(|_| ()) + } + } + }; +} + +impl_tocss_for_float!(f32); +impl_tocss_for_float!(f64); + +/// A category of token. See the `needs_separator_when_before` method. +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub struct TokenSerializationType(TokenSerializationTypeVariants); + +impl TokenSerializationType { + /// Return a value that represents the absence of a token, e.g. before the start of the input. + pub fn nothing() -> TokenSerializationType { + TokenSerializationType(TokenSerializationTypeVariants::Nothing) + } + + /// If this value is `TokenSerializationType::nothing()`, set it to the given value instead. + pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) { + if self.0 == TokenSerializationTypeVariants::Nothing { + self.0 = new_value.0 + } + } + + /// Return true if, when a token of category `self` is serialized just before + /// a token of category `other` with no whitespace in between, + /// an empty comment `/**/` needs to be inserted between them + /// so that they are not re-parsed as a single token. + /// + /// See https://drafts.csswg.org/css-syntax/#serialization + /// + /// See https://github.com/w3c/csswg-drafts/issues/4088 for the + /// `DelimPercent` bits. + pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool { + use self::TokenSerializationTypeVariants::*; + match self.0 { + Ident => matches!( + other.0, + Ident + | Function + | UrlOrBadUrl + | DelimMinus + | Number + | Percentage + | Dimension + | CDC + | OpenParen + ), + AtKeywordOrHash | Dimension => matches!( + other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | CDC + ), + DelimHash | DelimMinus => matches!( + other.0, + Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension + ), + Number => matches!( + other.0, + Ident + | Function + | UrlOrBadUrl + | DelimMinus + | Number + | Percentage + | DelimPercent + | Dimension + ), + DelimAt => matches!(other.0, Ident | Function | UrlOrBadUrl | DelimMinus), + DelimDotOrPlus => matches!(other.0, Number | Percentage | Dimension), + DelimAssorted | DelimAsterisk => matches!(other.0, DelimEquals), + DelimBar => matches!(other.0, DelimEquals | DelimBar | DashMatch), + DelimSlash => matches!(other.0, DelimAsterisk | SubstringMatch), + Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen + | DashMatch | SubstringMatch | DelimQuestion | DelimEquals | DelimPercent | Other => { + false + } + } + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +enum TokenSerializationTypeVariants { + Nothing, + WhiteSpace, + AtKeywordOrHash, + Number, + Dimension, + Percentage, + UrlOrBadUrl, + Function, + Ident, + CDC, + DashMatch, + SubstringMatch, + OpenParen, // '(' + DelimHash, // '#' + DelimAt, // '@' + DelimDotOrPlus, // '.', '+' + DelimMinus, // '-' + DelimQuestion, // '?' + DelimAssorted, // '$', '^', '~' + DelimEquals, // '=' + DelimBar, // '|' + DelimSlash, // '/' + DelimAsterisk, // '*' + DelimPercent, // '%' + Other, // anything else +} + +impl<'a> Token<'a> { + /// Categorize a token into a type that determines when `/**/` needs to be inserted + /// between two tokens when serialized next to each other without whitespace in between. + /// + /// See the `TokenSerializationType::needs_separator_when_before` method. + pub fn serialization_type(&self) -> TokenSerializationType { + use self::TokenSerializationTypeVariants::*; + TokenSerializationType(match *self { + Token::Ident(_) => Ident, + Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash, + Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl, + Token::Delim('#') => DelimHash, + Token::Delim('@') => DelimAt, + Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus, + Token::Delim('-') => DelimMinus, + Token::Delim('?') => DelimQuestion, + Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted, + Token::Delim('%') => DelimPercent, + Token::Delim('=') => DelimEquals, + Token::Delim('|') => DelimBar, + Token::Delim('/') => DelimSlash, + Token::Delim('*') => DelimAsterisk, + Token::Number { .. } => Number, + Token::Percentage { .. } => Percentage, + Token::Dimension { .. } => Dimension, + Token::WhiteSpace(_) => WhiteSpace, + Token::Comment(_) => DelimSlash, + Token::DashMatch => DashMatch, + Token::SubstringMatch => SubstringMatch, + Token::CDC => CDC, + Token::Function(_) => Function, + Token::ParenthesisBlock => OpenParen, + Token::SquareBracketBlock + | Token::CurlyBracketBlock + | Token::CloseParenthesis + | Token::CloseSquareBracket + | Token::CloseCurlyBracket + | Token::QuotedString(_) + | Token::BadString(_) + | Token::Delim(_) + | Token::Colon + | Token::Semicolon + | Token::Comma + | Token::CDO + | Token::IncludeMatch + | Token::PrefixMatch + | Token::SuffixMatch => Other, + }) + } +} diff --git a/third_party/rust/cssparser/src/size_of_tests.rs b/third_party/rust/cssparser/src/size_of_tests.rs new file mode 100644 index 0000000000..7e39e4c6a2 --- /dev/null +++ b/third_party/rust/cssparser/src/size_of_tests.rs @@ -0,0 +1,59 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::cow_rc_str::CowRcStr; +use crate::tokenizer::Token; +use std::borrow::Cow; + +macro_rules! size_of_test { + ($testname: ident, $t: ty, $expected_size: expr) => { + #[test] + fn $testname() { + let new = ::std::mem::size_of::<$t>(); + let old = $expected_size; + if new < old { + panic!( + "Your changes have decreased the stack size of {} from {} to {}. \ + Good work! Please update the expected size in {}.", + stringify!($t), + old, + new, + file!() + ) + } else if new > old { + panic!( + "Your changes have increased the stack size of {} from {} to {}. \ + Please consider choosing a design which avoids this increase. \ + If you feel that the increase is necessary, update the size in {}.", + stringify!($t), + old, + new, + file!() + ) + } + } + }; +} + +// Some of these assume 64-bit +size_of_test!(token, Token, 32); +size_of_test!(std_cow_str, Cow<'static, str>, 32); +size_of_test!(cow_rc_str, CowRcStr, 16); + +size_of_test!(tokenizer, crate::tokenizer::Tokenizer, 72); +size_of_test!( + parser_input, + crate::parser::ParserInput, + if cfg!(rustc_has_pr45225) { 136 } else { 144 } +); +size_of_test!(parser, crate::parser::Parser, 16); +size_of_test!(source_position, crate::SourcePosition, 8); +size_of_test!(parser_state, crate::ParserState, 24); + +size_of_test!(basic_parse_error, crate::BasicParseError, 48); +size_of_test!( + parse_error_lower_bound, + crate::ParseError<()>, + if cfg!(rustc_has_pr45225) { 48 } else { 56 } +); diff --git a/third_party/rust/cssparser/src/tests.rs b/third_party/rust/cssparser/src/tests.rs new file mode 100644 index 0000000000..7aa6c462ed --- /dev/null +++ b/third_party/rust/cssparser/src/tests.rs @@ -0,0 +1,1429 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#[cfg(feature = "bench")] +extern crate test; + +use encoding_rs; +use matches::matches; +use serde_json::{self, json, Map, Value}; + +#[cfg(feature = "bench")] +use self::test::Bencher; + +use super::{ + parse_important, parse_nth, parse_one_declaration, parse_one_rule, stylesheet_encoding, + AtRuleParser, AtRuleType, BasicParseError, BasicParseErrorKind, Color, CowRcStr, + DeclarationListParser, DeclarationParser, Delimiter, EncodingSupport, ParseError, + ParseErrorKind, Parser, ParserInput, ParserState, QualifiedRuleParser, RuleListParser, + SourceLocation, ToCss, Token, TokenSerializationType, UnicodeRange, RGBA, +}; + +macro_rules! JArray { + ($($e: expr,)*) => { JArray![ $( $e ),* ] }; + ($($e: expr),*) => { Value::Array(vec!( $( $e.to_json() ),* )) } +} + +fn almost_equals(a: &Value, b: &Value) -> bool { + match (a, b) { + (&Value::Number(ref a), &Value::Number(ref b)) => { + let a = a.as_f64().unwrap(); + let b = b.as_f64().unwrap(); + (a - b).abs() <= a.abs() * 1e-6 + } + + (&Value::Bool(a), &Value::Bool(b)) => a == b, + (&Value::String(ref a), &Value::String(ref b)) => a == b, + (&Value::Array(ref a), &Value::Array(ref b)) => { + a.len() == b.len() + && a.iter() + .zip(b.iter()) + .all(|(ref a, ref b)| almost_equals(*a, *b)) + } + (&Value::Object(_), &Value::Object(_)) => panic!("Not implemented"), + (&Value::Null, &Value::Null) => true, + _ => false, + } +} + +fn normalize(json: &mut Value) { + match *json { + Value::Array(ref mut list) => { + for item in list.iter_mut() { + normalize(item) + } + } + Value::String(ref mut s) => { + if *s == "extra-input" || *s == "empty" { + *s = "invalid".to_string() + } + } + _ => {} + } +} + +fn assert_json_eq(results: Value, mut expected: Value, message: &str) { + normalize(&mut expected); + if !almost_equals(&results, &expected) { + println!( + "{}", + ::difference::Changeset::new( + &serde_json::to_string_pretty(&results).unwrap(), + &serde_json::to_string_pretty(&expected).unwrap(), + "\n", + ) + ); + panic!("{}", message) + } +} + +fn run_raw_json_tests<F: Fn(Value, Value) -> ()>(json_data: &str, run: F) { + let items = match serde_json::from_str(json_data) { + Ok(Value::Array(items)) => items, + _ => panic!("Invalid JSON"), + }; + assert!(items.len() % 2 == 0); + let mut input = None; + for item in items.into_iter() { + match (&input, item) { + (&None, json_obj) => input = Some(json_obj), + (&Some(_), expected) => { + let input = input.take().unwrap(); + run(input, expected) + } + }; + } +} + +fn run_json_tests<F: Fn(&mut Parser) -> Value>(json_data: &str, parse: F) { + run_raw_json_tests(json_data, |input, expected| match input { + Value::String(input) => { + let mut parse_input = ParserInput::new(&input); + let result = parse(&mut Parser::new(&mut parse_input)); + assert_json_eq(result, expected, &input); + } + _ => panic!("Unexpected JSON"), + }); +} + +#[test] +fn component_value_list() { + run_json_tests( + include_str!("css-parsing-tests/component_value_list.json"), + |input| Value::Array(component_values_to_json(input)), + ); +} + +#[test] +fn one_component_value() { + run_json_tests( + include_str!("css-parsing-tests/one_component_value.json"), + |input| { + let result: Result<Value, ParseError<()>> = input.parse_entirely(|input| { + Ok(one_component_value_to_json(input.next()?.clone(), input)) + }); + result.unwrap_or(JArray!["error", "invalid"]) + }, + ); +} + +#[test] +fn declaration_list() { + run_json_tests( + include_str!("css-parsing-tests/declaration_list.json"), + |input| { + Value::Array( + DeclarationListParser::new(input, JsonParser) + .map(|result| result.unwrap_or(JArray!["error", "invalid"])) + .collect(), + ) + }, + ); +} + +#[test] +fn one_declaration() { + run_json_tests( + include_str!("css-parsing-tests/one_declaration.json"), + |input| { + parse_one_declaration(input, &mut JsonParser).unwrap_or(JArray!["error", "invalid"]) + }, + ); +} + +#[test] +fn rule_list() { + run_json_tests(include_str!("css-parsing-tests/rule_list.json"), |input| { + Value::Array( + RuleListParser::new_for_nested_rule(input, JsonParser) + .map(|result| result.unwrap_or(JArray!["error", "invalid"])) + .collect(), + ) + }); +} + +#[test] +fn stylesheet() { + run_json_tests(include_str!("css-parsing-tests/stylesheet.json"), |input| { + Value::Array( + RuleListParser::new_for_stylesheet(input, JsonParser) + .map(|result| result.unwrap_or(JArray!["error", "invalid"])) + .collect(), + ) + }); +} + +#[test] +fn one_rule() { + run_json_tests(include_str!("css-parsing-tests/one_rule.json"), |input| { + parse_one_rule(input, &mut JsonParser).unwrap_or(JArray!["error", "invalid"]) + }); +} + +#[test] +fn stylesheet_from_bytes() { + pub struct EncodingRs; + + impl EncodingSupport for EncodingRs { + type Encoding = &'static encoding_rs::Encoding; + + fn utf8() -> Self::Encoding { + encoding_rs::UTF_8 + } + + fn is_utf16_be_or_le(encoding: &Self::Encoding) -> bool { + *encoding == encoding_rs::UTF_16LE || *encoding == encoding_rs::UTF_16BE + } + + fn from_label(ascii_label: &[u8]) -> Option<Self::Encoding> { + encoding_rs::Encoding::for_label(ascii_label) + } + } + + run_raw_json_tests( + include_str!("css-parsing-tests/stylesheet_bytes.json"), + |input, expected| { + let map = match input { + Value::Object(map) => map, + _ => panic!("Unexpected JSON"), + }; + + let result = { + let css = get_string(&map, "css_bytes") + .unwrap() + .chars() + .map(|c| { + assert!(c as u32 <= 0xFF); + c as u8 + }) + .collect::<Vec<u8>>(); + let protocol_encoding_label = + get_string(&map, "protocol_encoding").map(|s| s.as_bytes()); + let environment_encoding = get_string(&map, "environment_encoding") + .map(|s| s.as_bytes()) + .and_then(EncodingRs::from_label); + + let encoding = stylesheet_encoding::<EncodingRs>( + &css, + protocol_encoding_label, + environment_encoding, + ); + let (css_unicode, used_encoding, _) = encoding.decode(&css); + let mut input = ParserInput::new(&css_unicode); + let input = &mut Parser::new(&mut input); + let rules = RuleListParser::new_for_stylesheet(input, JsonParser) + .map(|result| result.unwrap_or(JArray!["error", "invalid"])) + .collect::<Vec<_>>(); + JArray![rules, used_encoding.name().to_lowercase()] + }; + assert_json_eq(result, expected, &Value::Object(map).to_string()); + }, + ); + + fn get_string<'a>(map: &'a Map<String, Value>, key: &str) -> Option<&'a str> { + match map.get(key) { + Some(&Value::String(ref s)) => Some(s), + Some(&Value::Null) => None, + None => None, + _ => panic!("Unexpected JSON"), + } + } +} + +#[test] +fn expect_no_error_token() { + let mut input = ParserInput::new("foo 4px ( / { !bar }"); + assert!(Parser::new(&mut input).expect_no_error_token().is_ok()); + let mut input = ParserInput::new(")"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("}"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("(a){]"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("'\n'"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("url('\n'"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("url(a b)"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); + let mut input = ParserInput::new("url(\u{7F}))"); + assert!(Parser::new(&mut input).expect_no_error_token().is_err()); +} + +/// https://github.com/servo/rust-cssparser/issues/71 +#[test] +fn outer_block_end_consumed() { + let mut input = ParserInput::new("(calc(true))"); + let mut input = Parser::new(&mut input); + assert!(input.expect_parenthesis_block().is_ok()); + assert!(input + .parse_nested_block(|input| input + .expect_function_matching("calc") + .map_err(Into::<ParseError<()>>::into)) + .is_ok()); + println!("{:?}", input.position()); + assert!(input.next().is_err()); +} + +/// https://github.com/servo/rust-cssparser/issues/174 +#[test] +fn bad_url_slice_out_of_bounds() { + let mut input = ParserInput::new("url(\u{1}\\"); + let mut parser = Parser::new(&mut input); + let result = parser.next_including_whitespace_and_comments(); // This used to panic + assert_eq!(result, Ok(&Token::BadUrl("\u{1}\\".into()))); +} + +/// https://bugzilla.mozilla.org/show_bug.cgi?id=1383975 +#[test] +fn bad_url_slice_not_at_char_boundary() { + let mut input = ParserInput::new("url(9\n۰"); + let mut parser = Parser::new(&mut input); + let result = parser.next_including_whitespace_and_comments(); // This used to panic + assert_eq!(result, Ok(&Token::BadUrl("9\n۰".into()))); +} + +#[test] +fn unquoted_url_escaping() { + let token = Token::UnquotedUrl( + "\ + \x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\ + \x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \ + !\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\ + ^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\ + " + .into(), + ); + let serialized = token.to_css_string(); + assert_eq!( + serialized, + "\ + url(\ + \\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\a \\b \\c \\d \\e \\f \\10 \ + \\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1a \\1b \\1c \\1d \\1e \\1f \\20 \ + !\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\ + ^_`abcdefghijklmnopqrstuvwxyz{|}~\\7f é\ + )\ + " + ); + let mut input = ParserInput::new(&serialized); + assert_eq!(Parser::new(&mut input).next(), Ok(&token)); +} + +#[test] +fn test_expect_url() { + fn parse<'a>(s: &mut ParserInput<'a>) -> Result<CowRcStr<'a>, BasicParseError<'a>> { + Parser::new(s).expect_url() + } + let mut input = ParserInput::new("url()"); + assert_eq!(parse(&mut input).unwrap(), ""); + let mut input = ParserInput::new("url( "); + assert_eq!(parse(&mut input).unwrap(), ""); + let mut input = ParserInput::new("url( abc"); + assert_eq!(parse(&mut input).unwrap(), "abc"); + let mut input = ParserInput::new("url( abc \t)"); + assert_eq!(parse(&mut input).unwrap(), "abc"); + let mut input = ParserInput::new("url( 'abc' \t)"); + assert_eq!(parse(&mut input).unwrap(), "abc"); + let mut input = ParserInput::new("url(abc more stuff)"); + assert!(parse(&mut input).is_err()); + // The grammar at https://drafts.csswg.org/css-values/#urls plans for `<url-modifier>*` + // at the position of "more stuff", but no such modifier is defined yet. + let mut input = ParserInput::new("url('abc' more stuff)"); + assert!(parse(&mut input).is_err()); +} + +fn run_color_tests<F: Fn(Result<Color, ()>) -> Value>(json_data: &str, to_json: F) { + run_json_tests(json_data, |input| { + let result: Result<_, ParseError<()>> = + input.parse_entirely(|i| Color::parse(i).map_err(Into::into)); + to_json(result.map_err(|_| ())) + }); +} + +#[test] +fn color3() { + run_color_tests(include_str!("css-parsing-tests/color3.json"), |c| { + c.ok().map(|v| v.to_json()).unwrap_or(Value::Null) + }) +} + +#[test] +fn color3_hsl() { + run_color_tests(include_str!("css-parsing-tests/color3_hsl.json"), |c| { + c.ok().map(|v| v.to_json()).unwrap_or(Value::Null) + }) +} + +/// color3_keywords.json is different: R, G and B are in 0..255 rather than 0..1 +#[test] +fn color3_keywords() { + run_color_tests( + include_str!("css-parsing-tests/color3_keywords.json"), + |c| c.ok().map(|v| v.to_json()).unwrap_or(Value::Null), + ) +} + +#[test] +fn nth() { + run_json_tests(include_str!("css-parsing-tests/An+B.json"), |input| { + input + .parse_entirely(|i| { + let result: Result<_, ParseError<()>> = parse_nth(i).map_err(Into::into); + result + }) + .ok() + .map(|(v0, v1)| json!([v0, v1])) + .unwrap_or(Value::Null) + }); +} + +#[test] +fn unicode_range() { + run_json_tests(include_str!("css-parsing-tests/urange.json"), |input| { + let result: Result<_, ParseError<()>> = input.parse_comma_separated(|input| { + let result = UnicodeRange::parse(input).ok().map(|r| (r.start, r.end)); + if input.is_exhausted() { + Ok(result) + } else { + while let Ok(_) = input.next() {} + Ok(None) + } + }); + result + .unwrap() + .iter() + .map(|v| { + if let Some((v0, v1)) = v { + json!([v0, v1]) + } else { + Value::Null + } + }) + .collect::<Vec<_>>() + .to_json() + }); +} + +#[test] +fn serializer_not_preserving_comments() { + serializer(false) +} + +#[test] +fn serializer_preserving_comments() { + serializer(true) +} + +fn serializer(preserve_comments: bool) { + run_json_tests( + include_str!("css-parsing-tests/component_value_list.json"), + |input| { + fn write_to( + mut previous_token: TokenSerializationType, + input: &mut Parser, + string: &mut String, + preserve_comments: bool, + ) { + while let Ok(token) = if preserve_comments { + input + .next_including_whitespace_and_comments() + .map(|t| t.clone()) + } else { + input.next_including_whitespace().map(|t| t.clone()) + } { + let token_type = token.serialization_type(); + if !preserve_comments && previous_token.needs_separator_when_before(token_type) + { + string.push_str("/**/") + } + previous_token = token_type; + token.to_css(string).unwrap(); + let closing_token = match token { + Token::Function(_) | Token::ParenthesisBlock => { + Some(Token::CloseParenthesis) + } + Token::SquareBracketBlock => Some(Token::CloseSquareBracket), + Token::CurlyBracketBlock => Some(Token::CloseCurlyBracket), + _ => None, + }; + if let Some(closing_token) = closing_token { + let result: Result<_, ParseError<()>> = input.parse_nested_block(|input| { + write_to(previous_token, input, string, preserve_comments); + Ok(()) + }); + result.unwrap(); + closing_token.to_css(string).unwrap(); + } + } + } + let mut serialized = String::new(); + write_to( + TokenSerializationType::nothing(), + input, + &mut serialized, + preserve_comments, + ); + let mut input = ParserInput::new(&serialized); + let parser = &mut Parser::new(&mut input); + Value::Array(component_values_to_json(parser)) + }, + ); +} + +#[test] +fn serialize_bad_tokens() { + let mut input = ParserInput::new("url(foo\\) b\\)ar)'ba\\'\"z\n4"); + let mut parser = Parser::new(&mut input); + + let token = parser.next().unwrap().clone(); + assert!(matches!(token, Token::BadUrl(_))); + assert_eq!(token.to_css_string(), "url(foo\\) b\\)ar)"); + + let token = parser.next().unwrap().clone(); + assert!(matches!(token, Token::BadString(_))); + assert_eq!(token.to_css_string(), "\"ba'\\\"z"); + + let token = parser.next().unwrap().clone(); + assert!(matches!(token, Token::Number { .. })); + assert_eq!(token.to_css_string(), "4"); + + assert!(parser.next().is_err()); +} + +#[test] +fn serialize_current_color() { + let c = Color::CurrentColor; + assert!(c.to_css_string() == "currentcolor"); +} + +#[test] +fn serialize_rgb_full_alpha() { + let c = Color::RGBA(RGBA::new(255, 230, 204, 255)); + assert_eq!(c.to_css_string(), "rgb(255, 230, 204)"); +} + +#[test] +fn serialize_rgba() { + let c = Color::RGBA(RGBA::new(26, 51, 77, 32)); + assert_eq!(c.to_css_string(), "rgba(26, 51, 77, 0.125)"); +} + +#[test] +fn serialize_rgba_two_digit_float_if_roundtrips() { + let c = Color::RGBA(RGBA::from_floats(0., 0., 0., 0.5)); + assert_eq!(c.to_css_string(), "rgba(0, 0, 0, 0.5)"); +} + +#[test] +fn line_numbers() { + let mut input = ParserInput::new(concat!( + "fo\\30\r\n", + "0o bar/*\n", + "*/baz\r\n", + "\n", + "url(\r\n", + " u \r\n", + ")\"a\\\r\n", + "b\"" + )); + let mut input = Parser::new(&mut input); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 0, column: 1 } + ); + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::Ident("fo00o".into())) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 1, column: 3 } + ); + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::WhiteSpace(" ")) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 1, column: 4 } + ); + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::Ident("bar".into())) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 1, column: 7 } + ); + assert_eq!( + input.next_including_whitespace_and_comments(), + Ok(&Token::Comment("\n")) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 2, column: 3 } + ); + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::Ident("baz".into())) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 2, column: 6 } + ); + let state = input.state(); + + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::WhiteSpace("\r\n\n")) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 4, column: 1 } + ); + + assert_eq!( + state.source_location(), + SourceLocation { line: 2, column: 6 } + ); + + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::UnquotedUrl("u".into())) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 6, column: 2 } + ); + + assert_eq!( + input.next_including_whitespace(), + Ok(&Token::QuotedString("ab".into())) + ); + assert_eq!( + input.current_source_location(), + SourceLocation { line: 7, column: 3 } + ); + assert!(input.next_including_whitespace().is_err()); +} + +#[test] +fn overflow() { + use std::f32; + use std::iter::repeat; + + let css = r" + 2147483646 + 2147483647 + 2147483648 + 10000000000000 + 1000000000000000000000000000000000000000 + 1{309 zeros} + + -2147483647 + -2147483648 + -2147483649 + -10000000000000 + -1000000000000000000000000000000000000000 + -1{309 zeros} + + 3.30282347e+38 + 3.40282347e+38 + 3.402824e+38 + + -3.30282347e+38 + -3.40282347e+38 + -3.402824e+38 + + " + .replace("{309 zeros}", &repeat('0').take(309).collect::<String>()); + let mut input = ParserInput::new(&css); + let mut input = Parser::new(&mut input); + + assert_eq!(input.expect_integer(), Ok(2147483646)); + assert_eq!(input.expect_integer(), Ok(2147483647)); + assert_eq!(input.expect_integer(), Ok(2147483647)); // Clamp on overflow + assert_eq!(input.expect_integer(), Ok(2147483647)); + assert_eq!(input.expect_integer(), Ok(2147483647)); + assert_eq!(input.expect_integer(), Ok(2147483647)); + + assert_eq!(input.expect_integer(), Ok(-2147483647)); + assert_eq!(input.expect_integer(), Ok(-2147483648)); + assert_eq!(input.expect_integer(), Ok(-2147483648)); // Clamp on overflow + assert_eq!(input.expect_integer(), Ok(-2147483648)); + assert_eq!(input.expect_integer(), Ok(-2147483648)); + assert_eq!(input.expect_integer(), Ok(-2147483648)); + + assert_eq!(input.expect_number(), Ok(3.30282347e+38)); + assert_eq!(input.expect_number(), Ok(f32::MAX)); + assert_eq!(input.expect_number(), Ok(f32::INFINITY)); + assert!(f32::MAX != f32::INFINITY); + + assert_eq!(input.expect_number(), Ok(-3.30282347e+38)); + assert_eq!(input.expect_number(), Ok(f32::MIN)); + assert_eq!(input.expect_number(), Ok(f32::NEG_INFINITY)); + assert!(f32::MIN != f32::NEG_INFINITY); +} + +#[test] +fn line_delimited() { + let mut input = ParserInput::new(" { foo ; bar } baz;,"); + let mut input = Parser::new(&mut input); + assert_eq!(input.next(), Ok(&Token::CurlyBracketBlock)); + assert!({ + let result: Result<_, ParseError<()>> = + input.parse_until_after(Delimiter::Semicolon, |_| Ok(42)); + result + } + .is_err()); + assert_eq!(input.next(), Ok(&Token::Comma)); + assert!(input.next().is_err()); +} + +#[test] +fn identifier_serialization() { + // Null bytes + assert_eq!(Token::Ident("\0".into()).to_css_string(), "\u{FFFD}"); + assert_eq!(Token::Ident("a\0".into()).to_css_string(), "a\u{FFFD}"); + assert_eq!(Token::Ident("\0b".into()).to_css_string(), "\u{FFFD}b"); + assert_eq!(Token::Ident("a\0b".into()).to_css_string(), "a\u{FFFD}b"); + + // Replacement character + assert_eq!(Token::Ident("\u{FFFD}".into()).to_css_string(), "\u{FFFD}"); + assert_eq!( + Token::Ident("a\u{FFFD}".into()).to_css_string(), + "a\u{FFFD}" + ); + assert_eq!( + Token::Ident("\u{FFFD}b".into()).to_css_string(), + "\u{FFFD}b" + ); + assert_eq!( + Token::Ident("a\u{FFFD}b".into()).to_css_string(), + "a\u{FFFD}b" + ); + + // Number prefix + assert_eq!(Token::Ident("0a".into()).to_css_string(), "\\30 a"); + assert_eq!(Token::Ident("1a".into()).to_css_string(), "\\31 a"); + assert_eq!(Token::Ident("2a".into()).to_css_string(), "\\32 a"); + assert_eq!(Token::Ident("3a".into()).to_css_string(), "\\33 a"); + assert_eq!(Token::Ident("4a".into()).to_css_string(), "\\34 a"); + assert_eq!(Token::Ident("5a".into()).to_css_string(), "\\35 a"); + assert_eq!(Token::Ident("6a".into()).to_css_string(), "\\36 a"); + assert_eq!(Token::Ident("7a".into()).to_css_string(), "\\37 a"); + assert_eq!(Token::Ident("8a".into()).to_css_string(), "\\38 a"); + assert_eq!(Token::Ident("9a".into()).to_css_string(), "\\39 a"); + + // Letter number prefix + assert_eq!(Token::Ident("a0b".into()).to_css_string(), "a0b"); + assert_eq!(Token::Ident("a1b".into()).to_css_string(), "a1b"); + assert_eq!(Token::Ident("a2b".into()).to_css_string(), "a2b"); + assert_eq!(Token::Ident("a3b".into()).to_css_string(), "a3b"); + assert_eq!(Token::Ident("a4b".into()).to_css_string(), "a4b"); + assert_eq!(Token::Ident("a5b".into()).to_css_string(), "a5b"); + assert_eq!(Token::Ident("a6b".into()).to_css_string(), "a6b"); + assert_eq!(Token::Ident("a7b".into()).to_css_string(), "a7b"); + assert_eq!(Token::Ident("a8b".into()).to_css_string(), "a8b"); + assert_eq!(Token::Ident("a9b".into()).to_css_string(), "a9b"); + + // Dash number prefix + assert_eq!(Token::Ident("-0a".into()).to_css_string(), "-\\30 a"); + assert_eq!(Token::Ident("-1a".into()).to_css_string(), "-\\31 a"); + assert_eq!(Token::Ident("-2a".into()).to_css_string(), "-\\32 a"); + assert_eq!(Token::Ident("-3a".into()).to_css_string(), "-\\33 a"); + assert_eq!(Token::Ident("-4a".into()).to_css_string(), "-\\34 a"); + assert_eq!(Token::Ident("-5a".into()).to_css_string(), "-\\35 a"); + assert_eq!(Token::Ident("-6a".into()).to_css_string(), "-\\36 a"); + assert_eq!(Token::Ident("-7a".into()).to_css_string(), "-\\37 a"); + assert_eq!(Token::Ident("-8a".into()).to_css_string(), "-\\38 a"); + assert_eq!(Token::Ident("-9a".into()).to_css_string(), "-\\39 a"); + + // Double dash prefix + assert_eq!(Token::Ident("--a".into()).to_css_string(), "--a"); + + // Various tests + assert_eq!( + Token::Ident("\x01\x02\x1E\x1F".into()).to_css_string(), + "\\1 \\2 \\1e \\1f " + ); + assert_eq!( + Token::Ident("\u{0080}\x2D\x5F\u{00A9}".into()).to_css_string(), + "\u{0080}\x2D\x5F\u{00A9}" + ); + assert_eq!(Token::Ident("\x7F\u{0080}\u{0081}\u{0082}\u{0083}\u{0084}\u{0085}\u{0086}\u{0087}\u{0088}\u{0089}\ + \u{008A}\u{008B}\u{008C}\u{008D}\u{008E}\u{008F}\u{0090}\u{0091}\u{0092}\u{0093}\u{0094}\u{0095}\u{0096}\ + \u{0097}\u{0098}\u{0099}\u{009A}\u{009B}\u{009C}\u{009D}\u{009E}\u{009F}".into()).to_css_string(), + "\\7f \u{0080}\u{0081}\u{0082}\u{0083}\u{0084}\u{0085}\u{0086}\u{0087}\u{0088}\u{0089}\u{008A}\u{008B}\u{008C}\ + \u{008D}\u{008E}\u{008F}\u{0090}\u{0091}\u{0092}\u{0093}\u{0094}\u{0095}\u{0096}\u{0097}\u{0098}\u{0099}\ + \u{009A}\u{009B}\u{009C}\u{009D}\u{009E}\u{009F}"); + assert_eq!( + Token::Ident("\u{00A0}\u{00A1}\u{00A2}".into()).to_css_string(), + "\u{00A0}\u{00A1}\u{00A2}" + ); + assert_eq!( + Token::Ident("a0123456789b".into()).to_css_string(), + "a0123456789b" + ); + assert_eq!( + Token::Ident("abcdefghijklmnopqrstuvwxyz".into()).to_css_string(), + "abcdefghijklmnopqrstuvwxyz" + ); + assert_eq!( + Token::Ident("ABCDEFGHIJKLMNOPQRSTUVWXYZ".into()).to_css_string(), + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + ); + assert_eq!( + Token::Ident("\x20\x21\x78\x79".into()).to_css_string(), + "\\ \\!xy" + ); + + // astral symbol (U+1D306 TETRAGRAM FOR CENTRE) + assert_eq!( + Token::Ident("\u{1D306}".into()).to_css_string(), + "\u{1D306}" + ); +} + +trait ToJson { + fn to_json(&self) -> Value; +} + +impl<T> ToJson for T +where + T: Clone, + Value: From<T>, +{ + fn to_json(&self) -> Value { + Value::from(self.clone()) + } +} + +impl ToJson for Color { + fn to_json(&self) -> Value { + match *self { + Color::RGBA(ref rgba) => json!([rgba.red, rgba.green, rgba.blue, rgba.alpha]), + Color::CurrentColor => "currentcolor".to_json(), + } + } +} + +impl<'a> ToJson for CowRcStr<'a> { + fn to_json(&self) -> Value { + let s: &str = &*self; + s.to_json() + } +} + +#[cfg(feature = "bench")] +const BACKGROUND_IMAGE: &'static str = include_str!("big-data-url.css"); + +#[cfg(feature = "bench")] +#[bench] +fn unquoted_url(b: &mut Bencher) { + b.iter(|| { + let mut input = ParserInput::new(BACKGROUND_IMAGE); + let mut input = Parser::new(&mut input); + input.look_for_var_or_env_functions(); + + let result = input.try_parse(|input| input.expect_url()); + + assert!(result.is_ok()); + + input.seen_var_or_env_functions(); + (result.is_ok(), input.seen_var_or_env_functions()) + }) +} + +#[cfg(feature = "bench")] +#[bench] +fn numeric(b: &mut Bencher) { + b.iter(|| { + for _ in 0..1000000 { + let mut input = ParserInput::new("10px"); + let mut input = Parser::new(&mut input); + let _ = test::black_box(input.next()); + } + }) +} + +struct JsonParser; + +#[test] +fn no_stack_overflow_multiple_nested_blocks() { + let mut input: String = "{{".into(); + for _ in 0..20 { + let dup = input.clone(); + input.push_str(&dup); + } + let mut input = ParserInput::new(&input); + let mut input = Parser::new(&mut input); + while let Ok(..) = input.next() {} +} + +impl<'i> DeclarationParser<'i> for JsonParser { + type Declaration = Value; + type Error = (); + + fn parse_value<'t>( + &mut self, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + ) -> Result<Value, ParseError<'i, ()>> { + let mut value = vec![]; + let mut important = false; + loop { + let start = input.state(); + if let Ok(mut token) = input.next_including_whitespace().map(|t| t.clone()) { + // Hack to deal with css-parsing-tests assuming that + // `!important` in the middle of a declaration value is OK. + // This can never happen per spec + // (even CSS Variables forbid top-level `!`) + if token == Token::Delim('!') { + input.reset(&start); + if parse_important(input).is_ok() { + if input.is_exhausted() { + important = true; + break; + } + } + input.reset(&start); + token = input.next_including_whitespace().unwrap().clone(); + } + value.push(one_component_value_to_json(token, input)); + } else { + break; + } + } + Ok(JArray!["declaration", name, value, important,]) + } +} + +impl<'i> AtRuleParser<'i> for JsonParser { + type PreludeNoBlock = Vec<Value>; + type PreludeBlock = Vec<Value>; + type AtRule = Value; + type Error = (); + + fn parse_prelude<'t>( + &mut self, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + ) -> Result<AtRuleType<Vec<Value>, Vec<Value>>, ParseError<'i, ()>> { + let prelude = vec![ + "at-rule".to_json(), + name.to_json(), + Value::Array(component_values_to_json(input)), + ]; + match_ignore_ascii_case! { &*name, + "media" | "foo-with-block" => Ok(AtRuleType::WithBlock(prelude)), + "charset" => { + Err(input.new_error(BasicParseErrorKind::AtRuleInvalid(name.clone()).into())) + }, + _ => Ok(AtRuleType::WithoutBlock(prelude)), + } + } + + fn rule_without_block(&mut self, mut prelude: Vec<Value>, _: &ParserState) -> Value { + prelude.push(Value::Null); + Value::Array(prelude) + } + + fn parse_block<'t>( + &mut self, + mut prelude: Vec<Value>, + _: &ParserState, + input: &mut Parser<'i, 't>, + ) -> Result<Value, ParseError<'i, ()>> { + prelude.push(Value::Array(component_values_to_json(input))); + Ok(Value::Array(prelude)) + } +} + +impl<'i> QualifiedRuleParser<'i> for JsonParser { + type Prelude = Vec<Value>; + type QualifiedRule = Value; + type Error = (); + + fn parse_prelude<'t>( + &mut self, + input: &mut Parser<'i, 't>, + ) -> Result<Vec<Value>, ParseError<'i, ()>> { + Ok(component_values_to_json(input)) + } + + fn parse_block<'t>( + &mut self, + prelude: Vec<Value>, + _: &ParserState, + input: &mut Parser<'i, 't>, + ) -> Result<Value, ParseError<'i, ()>> { + Ok(JArray![ + "qualified rule", + prelude, + component_values_to_json(input), + ]) + } +} + +fn component_values_to_json(input: &mut Parser) -> Vec<Value> { + let mut values = vec![]; + while let Ok(token) = input.next_including_whitespace().map(|t| t.clone()) { + values.push(one_component_value_to_json(token, input)); + } + values +} + +fn one_component_value_to_json(token: Token, input: &mut Parser) -> Value { + fn numeric(value: f32, int_value: Option<i32>, has_sign: bool) -> Vec<Value> { + vec![ + Token::Number { + value: value, + int_value: int_value, + has_sign: has_sign, + } + .to_css_string() + .to_json(), + match int_value { + Some(i) => i.to_json(), + None => value.to_json(), + }, + match int_value { + Some(_) => "integer", + None => "number", + } + .to_json(), + ] + } + + fn nested(input: &mut Parser) -> Vec<Value> { + let result: Result<_, ParseError<()>> = + input.parse_nested_block(|input| Ok(component_values_to_json(input))); + result.unwrap() + } + + match token { + Token::Ident(value) => JArray!["ident", value], + Token::AtKeyword(value) => JArray!["at-keyword", value], + Token::Hash(value) => JArray!["hash", value, "unrestricted"], + Token::IDHash(value) => JArray!["hash", value, "id"], + Token::QuotedString(value) => JArray!["string", value], + Token::UnquotedUrl(value) => JArray!["url", value], + Token::Delim('\\') => "\\".to_json(), + Token::Delim(value) => value.to_string().to_json(), + + Token::Number { + value, + int_value, + has_sign, + } => Value::Array({ + let mut v = vec!["number".to_json()]; + v.extend(numeric(value, int_value, has_sign)); + v + }), + Token::Percentage { + unit_value, + int_value, + has_sign, + } => Value::Array({ + let mut v = vec!["percentage".to_json()]; + v.extend(numeric(unit_value * 100., int_value, has_sign)); + v + }), + Token::Dimension { + value, + int_value, + has_sign, + unit, + } => Value::Array({ + let mut v = vec!["dimension".to_json()]; + v.extend(numeric(value, int_value, has_sign)); + v.push(unit.to_json()); + v + }), + + Token::WhiteSpace(_) => " ".to_json(), + Token::Comment(_) => "/**/".to_json(), + Token::Colon => ":".to_json(), + Token::Semicolon => ";".to_json(), + Token::Comma => ",".to_json(), + Token::IncludeMatch => "~=".to_json(), + Token::DashMatch => "|=".to_json(), + Token::PrefixMatch => "^=".to_json(), + Token::SuffixMatch => "$=".to_json(), + Token::SubstringMatch => "*=".to_json(), + Token::CDO => "<!--".to_json(), + Token::CDC => "-->".to_json(), + + Token::Function(name) => Value::Array({ + let mut v = vec!["function".to_json(), name.to_json()]; + v.extend(nested(input)); + v + }), + Token::ParenthesisBlock => Value::Array({ + let mut v = vec!["()".to_json()]; + v.extend(nested(input)); + v + }), + Token::SquareBracketBlock => Value::Array({ + let mut v = vec!["[]".to_json()]; + v.extend(nested(input)); + v + }), + Token::CurlyBracketBlock => Value::Array({ + let mut v = vec!["{}".to_json()]; + v.extend(nested(input)); + v + }), + Token::BadUrl(_) => JArray!["error", "bad-url"], + Token::BadString(_) => JArray!["error", "bad-string"], + Token::CloseParenthesis => JArray!["error", ")"], + Token::CloseSquareBracket => JArray!["error", "]"], + Token::CloseCurlyBracket => JArray!["error", "}"], + } +} + +/// A previous version of procedural-masquerade had a bug where it +/// would normalize consecutive whitespace to a single space, +/// including in string literals. +#[test] +fn procedural_masquerade_whitespace() { + ascii_case_insensitive_phf_map! { + map -> () = { + " \t\n" => () + } + } + assert_eq!(map(" \t\n"), Some(&())); + assert_eq!(map(" "), None); + + match_ignore_ascii_case! { " \t\n", + " " => panic!("1"), + " \t\n" => {}, + _ => panic!("2"), + } + + match_ignore_ascii_case! { " ", + " \t\n" => panic!("3"), + " " => {}, + _ => panic!("4"), + } +} + +#[test] +fn parse_until_before_stops_at_delimiter_or_end_of_input() { + // For all j and k, inputs[i].1[j] should parse the same as inputs[i].1[k] + // when we use delimiters inputs[i].0. + let inputs = vec![ + ( + Delimiter::Bang | Delimiter::Semicolon, + // Note that the ';extra' is fine, because the ';' acts the same as + // the end of input. + vec!["token stream;extra", "token stream!", "token stream"], + ), + (Delimiter::Bang | Delimiter::Semicolon, vec![";", "!", ""]), + ]; + for equivalent in inputs { + for (j, x) in equivalent.1.iter().enumerate() { + for y in equivalent.1[j + 1..].iter() { + let mut ix = ParserInput::new(x); + let mut ix = Parser::new(&mut ix); + + let mut iy = ParserInput::new(y); + let mut iy = Parser::new(&mut iy); + + let _ = ix.parse_until_before::<_, _, ()>(equivalent.0, |ix| { + iy.parse_until_before::<_, _, ()>(equivalent.0, |iy| { + loop { + let ox = ix.next(); + let oy = iy.next(); + assert_eq!(ox, oy); + if let Err(_) = ox { + break; + } + } + Ok(()) + }) + }); + } + } + } +} + +#[test] +fn parser_maintains_current_line() { + let mut input = ParserInput::new("ident ident;\nident ident ident;\nident"); + let mut parser = Parser::new(&mut input); + assert_eq!(parser.current_line(), "ident ident;"); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Semicolon)); + + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.current_line(), "ident ident ident;"); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.next(), Ok(&Token::Semicolon)); + + assert_eq!(parser.next(), Ok(&Token::Ident("ident".into()))); + assert_eq!(parser.current_line(), "ident"); +} + +#[test] +fn parser_with_line_number_offset() { + let mut input = ParserInput::new_with_line_number_offset("ident\nident", 72); + let mut parser = Parser::new(&mut input); + assert_eq!( + parser.current_source_location(), + SourceLocation { + line: 72, + column: 1 + } + ); + assert_eq!( + parser.next_including_whitespace_and_comments(), + Ok(&Token::Ident("ident".into())) + ); + assert_eq!( + parser.current_source_location(), + SourceLocation { + line: 72, + column: 6 + } + ); + assert_eq!( + parser.next_including_whitespace_and_comments(), + Ok(&Token::WhiteSpace("\n".into())) + ); + assert_eq!( + parser.current_source_location(), + SourceLocation { + line: 73, + column: 1 + } + ); + assert_eq!( + parser.next_including_whitespace_and_comments(), + Ok(&Token::Ident("ident".into())) + ); + assert_eq!( + parser.current_source_location(), + SourceLocation { + line: 73, + column: 6 + } + ); +} + +#[test] +fn cdc_regression_test() { + let mut input = ParserInput::new("-->x"); + let mut parser = Parser::new(&mut input); + parser.skip_cdc_and_cdo(); + assert_eq!(parser.next(), Ok(&Token::Ident("x".into()))); + assert_eq!( + parser.next(), + Err(BasicParseError { + kind: BasicParseErrorKind::EndOfInput, + location: SourceLocation { line: 0, column: 5 } + }) + ); +} + +#[test] +fn parse_entirely_reports_first_error() { + #[derive(PartialEq, Debug)] + enum E { + Foo, + } + let mut input = ParserInput::new("ident"); + let mut parser = Parser::new(&mut input); + let result: Result<(), _> = parser.parse_entirely(|p| Err(p.new_custom_error(E::Foo))); + assert_eq!( + result, + Err(ParseError { + kind: ParseErrorKind::Custom(E::Foo), + location: SourceLocation { line: 0, column: 1 }, + }) + ); +} + +#[test] +fn parse_sourcemapping_comments() { + let tests = vec![ + ("/*# sourceMappingURL=here*/", Some("here")), + ("/*# sourceMappingURL=here */", Some("here")), + ("/*@ sourceMappingURL=here*/", Some("here")), + ( + "/*@ sourceMappingURL=there*/ /*# sourceMappingURL=here*/", + Some("here"), + ), + ("/*# sourceMappingURL=here there */", Some("here")), + ("/*# sourceMappingURL= here */", Some("")), + ("/*# sourceMappingURL=*/", Some("")), + ("/*# sourceMappingUR=here */", None), + ("/*! sourceMappingURL=here */", None), + ("/*# sourceMappingURL = here */", None), + ("/* # sourceMappingURL=here */", None), + ]; + + for test in tests { + let mut input = ParserInput::new(test.0); + let mut parser = Parser::new(&mut input); + while let Ok(_) = parser.next_including_whitespace() {} + assert_eq!(parser.current_source_map_url(), test.1); + } +} + +#[test] +fn parse_sourceurl_comments() { + let tests = vec![ + ("/*# sourceURL=here*/", Some("here")), + ("/*# sourceURL=here */", Some("here")), + ("/*@ sourceURL=here*/", Some("here")), + ("/*@ sourceURL=there*/ /*# sourceURL=here*/", Some("here")), + ("/*# sourceURL=here there */", Some("here")), + ("/*# sourceURL= here */", Some("")), + ("/*# sourceURL=*/", Some("")), + ("/*# sourceMappingUR=here */", None), + ("/*! sourceURL=here */", None), + ("/*# sourceURL = here */", None), + ("/* # sourceURL=here */", None), + ]; + + for test in tests { + let mut input = ParserInput::new(test.0); + let mut parser = Parser::new(&mut input); + while let Ok(_) = parser.next_including_whitespace() {} + assert_eq!(parser.current_source_url(), test.1); + } +} + +#[test] +fn roundtrip_percentage_token() { + fn test_roundtrip(value: &str) { + let mut input = ParserInput::new(value); + let mut parser = Parser::new(&mut input); + let token = parser.next().unwrap(); + assert_eq!(token.to_css_string(), value); + } + // Test simple number serialization + for i in 0..101 { + test_roundtrip(&format!("{}%", i)); + for j in 0..10 { + if j != 0 { + test_roundtrip(&format!("{}.{}%", i, j)); + } + for k in 1..10 { + test_roundtrip(&format!("{}.{}{}%", i, j, k)); + } + } + } +} + +#[test] +fn utf16_columns() { + // This particular test serves two purposes. First, it checks + // that the column number computations are correct. Second, it + // checks that tokenizer code paths correctly differentiate + // between the different UTF-8 encoding bytes. In particular + // different leader bytes and continuation bytes are treated + // differently, so we make sure to include all lengths in the + // tests, using the string "QΡ✈🆒". Also, remember that because + // the column is in units of UTF-16, the 4-byte sequence results + // in two columns. + let tests = vec![ + ("", 1), + ("ascii", 6), + ("/*QΡ✈🆒*/", 10), + ("'QΡ✈🆒*'", 9), + ("\"\\\"'QΡ✈🆒*'", 12), + ("\\Q\\Ρ\\✈\\🆒", 10), + ("QΡ✈🆒", 6), + ("QΡ✈🆒\\Q\\Ρ\\✈\\🆒", 15), + ("newline\r\nQΡ✈🆒", 6), + ("url(QΡ✈🆒\\Q\\Ρ\\✈\\🆒)", 20), + ("url(QΡ✈🆒)", 11), + ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒)", 16), + ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒", 15), + ("url(\r\nQΡ✈🆒\\Q\\Ρ\\✈\\🆒 x", 17), + ("QΡ✈🆒()", 8), + // Test that under/over-flow of current_line_start_position is + // handled properly; see the special case in consume_4byte_intro. + ("🆒", 3), + ]; + + for test in tests { + let mut input = ParserInput::new(test.0); + let mut parser = Parser::new(&mut input); + + // Read all tokens. + loop { + match parser.next() { + Err(BasicParseError { + kind: BasicParseErrorKind::EndOfInput, + .. + }) => { + break; + } + Err(_) => { + assert!(false); + } + Ok(_) => {} + }; + } + + // Check the resulting column. + assert_eq!(parser.current_source_location().column, test.1); + } +} + +#[test] +fn servo_define_css_keyword_enum() { + macro_rules! define_css_keyword_enum { + (pub enum $name:ident { $($variant:ident = $css:expr,)+ }) => { + #[derive(PartialEq, Debug)] + pub enum $name { + $($variant),+ + } + + impl $name { + pub fn from_ident(ident: &str) -> Result<$name, ()> { + match_ignore_ascii_case! { ident, + $($css => Ok($name::$variant),)+ + _ => Err(()) + } + } + } + } + } + define_css_keyword_enum! { + pub enum UserZoom { + Zoom = "zoom", + Fixed = "fixed", + } + } + + assert_eq!(UserZoom::from_ident("fixed"), Ok(UserZoom::Fixed)); +} diff --git a/third_party/rust/cssparser/src/tokenizer.rs b/third_party/rust/cssparser/src/tokenizer.rs new file mode 100644 index 0000000000..62f3868362 --- /dev/null +++ b/third_party/rust/cssparser/src/tokenizer.rs @@ -0,0 +1,1397 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// https://drafts.csswg.org/css-syntax/#tokenization + +use self::Token::*; +use crate::cow_rc_str::CowRcStr; +use crate::parser::ParserState; +use matches::matches; +use std::char; +use std::i32; +use std::ops::Range; + +/// One of the pieces the CSS input is broken into. +/// +/// Some components use `Cow` in order to borrow from the original input string +/// and avoid allocating/copying when possible. +#[derive(PartialEq, Debug, Clone)] +pub enum Token<'a> { + /// A [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram) + Ident(CowRcStr<'a>), + + /// A [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram) + /// + /// The value does not include the `@` marker. + AtKeyword(CowRcStr<'a>), + + /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "unrestricted" + /// + /// The value does not include the `#` marker. + Hash(CowRcStr<'a>), + + /// A [`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "id" + /// + /// The value does not include the `#` marker. + IDHash(CowRcStr<'a>), // Hash that is a valid ID selector. + + /// A [`<string-token>`](https://drafts.csswg.org/css-syntax/#string-token-diagram) + /// + /// The value does not include the quotes. + QuotedString(CowRcStr<'a>), + + /// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) + /// + /// The value does not include the `url(` `)` markers. Note that `url( <string-token> )` is represented by a + /// `Function` token. + UnquotedUrl(CowRcStr<'a>), + + /// A `<delim-token>` + Delim(char), + + /// A [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram) + Number { + /// Whether the number had a `+` or `-` sign. + /// + /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.) + has_sign: bool, + + /// The value as a float + value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + int_value: Option<i32>, + }, + + /// A [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram) + Percentage { + /// Whether the number had a `+` or `-` sign. + has_sign: bool, + + /// The value as a float, divided by 100 so that the nominal range is 0.0 to 1.0. + unit_value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + /// It is **not** divided by 100. + int_value: Option<i32>, + }, + + /// A [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram) + Dimension { + /// Whether the number had a `+` or `-` sign. + /// + /// This is used is some cases like the <An+B> micro syntax. (See the `parse_nth` function.) + has_sign: bool, + + /// The value as a float + value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + int_value: Option<i32>, + + /// The unit, e.g. "px" in `12px` + unit: CowRcStr<'a>, + }, + + /// A [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram) + WhiteSpace(&'a str), + + /// A comment. + /// + /// The CSS Syntax spec does not generate tokens for comments, + /// But we do, because we can (borrowed &str makes it cheap). + /// + /// The value does not include the `/*` `*/` markers. + Comment(&'a str), + + /// A `:` `<colon-token>` + Colon, // : + + /// A `;` `<semicolon-token>` + Semicolon, // ; + + /// A `,` `<comma-token>` + Comma, // , + + /// A `~=` [`<include-match-token>`](https://drafts.csswg.org/css-syntax/#include-match-token-diagram) + IncludeMatch, + + /// A `|=` [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram) + DashMatch, + + /// A `^=` [`<prefix-match-token>`](https://drafts.csswg.org/css-syntax/#prefix-match-token-diagram) + PrefixMatch, + + /// A `$=` [`<suffix-match-token>`](https://drafts.csswg.org/css-syntax/#suffix-match-token-diagram) + SuffixMatch, + + /// A `*=` [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram) + SubstringMatch, + + /// A `<!--` [`<CDO-token>`](https://drafts.csswg.org/css-syntax/#CDO-token-diagram) + CDO, + + /// A `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram) + CDC, + + /// A [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram) + /// + /// The value (name) does not include the `(` marker. + Function(CowRcStr<'a>), + + /// A `<(-token>` + ParenthesisBlock, + + /// A `<[-token>` + SquareBracketBlock, + + /// A `<{-token>` + CurlyBracketBlock, + + /// A `<bad-url-token>` + /// + /// This token always indicates a parse error. + BadUrl(CowRcStr<'a>), + + /// A `<bad-string-token>` + /// + /// This token always indicates a parse error. + BadString(CowRcStr<'a>), + + /// A `<)-token>` + /// + /// When obtained from one of the `Parser::next*` methods, + /// this token is always unmatched and indicates a parse error. + CloseParenthesis, + + /// A `<]-token>` + /// + /// When obtained from one of the `Parser::next*` methods, + /// this token is always unmatched and indicates a parse error. + CloseSquareBracket, + + /// A `<}-token>` + /// + /// When obtained from one of the `Parser::next*` methods, + /// this token is always unmatched and indicates a parse error. + CloseCurlyBracket, +} + +impl<'a> Token<'a> { + /// Return whether this token represents a parse error. + /// + /// `BadUrl` and `BadString` are tokenizer-level parse errors. + /// + /// `CloseParenthesis`, `CloseSquareBracket`, and `CloseCurlyBracket` are *unmatched* + /// and therefore parse errors when returned by one of the `Parser::next*` methods. + pub fn is_parse_error(&self) -> bool { + matches!( + *self, + BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket + ) + } +} + +#[derive(Clone)] +pub struct Tokenizer<'a> { + input: &'a str, + /// Counted in bytes, not code points. From 0. + position: usize, + /// The position at the start of the current line; but adjusted to + /// ensure that computing the column will give the result in units + /// of UTF-16 characters. + current_line_start_position: usize, + current_line_number: u32, + var_or_env_functions: SeenStatus, + source_map_url: Option<&'a str>, + source_url: Option<&'a str>, +} + +#[derive(Copy, Clone, PartialEq, Eq)] +enum SeenStatus { + DontCare, + LookingForThem, + SeenAtLeastOne, +} + +impl<'a> Tokenizer<'a> { + #[inline] + pub fn new(input: &str) -> Tokenizer { + Tokenizer::with_first_line_number(input, 0) + } + + #[inline] + pub fn with_first_line_number(input: &str, first_line_number: u32) -> Tokenizer { + Tokenizer { + input: input, + position: 0, + current_line_start_position: 0, + current_line_number: first_line_number, + var_or_env_functions: SeenStatus::DontCare, + source_map_url: None, + source_url: None, + } + } + + #[inline] + pub fn look_for_var_or_env_functions(&mut self) { + self.var_or_env_functions = SeenStatus::LookingForThem; + } + + #[inline] + pub fn seen_var_or_env_functions(&mut self) -> bool { + let seen = self.var_or_env_functions == SeenStatus::SeenAtLeastOne; + self.var_or_env_functions = SeenStatus::DontCare; + seen + } + + #[inline] + pub fn see_function(&mut self, name: &str) { + if self.var_or_env_functions == SeenStatus::LookingForThem { + if name.eq_ignore_ascii_case("var") || name.eq_ignore_ascii_case("env") { + self.var_or_env_functions = SeenStatus::SeenAtLeastOne; + } + } + } + + #[inline] + pub fn next(&mut self) -> Result<Token<'a>, ()> { + next_token(self) + } + + #[inline] + pub fn position(&self) -> SourcePosition { + SourcePosition(self.position) + } + + #[inline] + pub fn current_source_location(&self) -> SourceLocation { + SourceLocation { + line: self.current_line_number, + column: (self.position - self.current_line_start_position + 1) as u32, + } + } + + #[inline] + pub fn current_source_map_url(&self) -> Option<&'a str> { + self.source_map_url + } + + #[inline] + pub fn current_source_url(&self) -> Option<&'a str> { + self.source_url + } + + #[inline] + pub fn state(&self) -> ParserState { + ParserState { + position: self.position, + current_line_start_position: self.current_line_start_position, + current_line_number: self.current_line_number, + at_start_of: None, + } + } + + #[inline] + pub fn reset(&mut self, state: &ParserState) { + self.position = state.position; + self.current_line_start_position = state.current_line_start_position; + self.current_line_number = state.current_line_number; + } + + #[inline] + pub fn slice_from(&self, start_pos: SourcePosition) -> &'a str { + &self.input[start_pos.0..self.position] + } + + #[inline] + pub fn slice(&self, range: Range<SourcePosition>) -> &'a str { + &self.input[range.start.0..range.end.0] + } + + pub fn current_source_line(&self) -> &'a str { + let current = self.position; + let start = self.input[0..current] + .rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) + .map_or(0, |start| start + 1); + let end = self.input[current..] + .find(|c| matches!(c, '\r' | '\n' | '\x0C')) + .map_or(self.input.len(), |end| current + end); + &self.input[start..end] + } + + #[inline] + pub fn next_byte(&self) -> Option<u8> { + if self.is_eof() { + None + } else { + Some(self.input.as_bytes()[self.position]) + } + } + + // If false, `tokenizer.next_char()` will not panic. + #[inline] + fn is_eof(&self) -> bool { + !self.has_at_least(0) + } + + // If true, the input has at least `n` bytes left *after* the current one. + // That is, `tokenizer.char_at(n)` will not panic. + #[inline] + fn has_at_least(&self, n: usize) -> bool { + self.position + n < self.input.len() + } + + // Advance over N bytes in the input. This function can advance + // over ASCII bytes (excluding newlines), or UTF-8 sequence + // leaders (excluding leaders for 4-byte sequences). + #[inline] + pub fn advance(&mut self, n: usize) { + if cfg!(debug_assertions) { + // Each byte must either be an ASCII byte or a sequence + // leader, but not a 4-byte leader; also newlines are + // rejected. + for i in 0..n { + let b = self.byte_at(i); + debug_assert!(b.is_ascii() || (b & 0xF0 != 0xF0 && b & 0xC0 != 0x80)); + debug_assert!(b != b'\r' && b != b'\n' && b != b'\x0C'); + } + } + self.position += n + } + + // Assumes non-EOF + #[inline] + fn next_byte_unchecked(&self) -> u8 { + self.byte_at(0) + } + + #[inline] + fn byte_at(&self, offset: usize) -> u8 { + self.input.as_bytes()[self.position + offset] + } + + // Advance over a single byte; the byte must be a UTF-8 sequence + // leader for a 4-byte sequence. + #[inline] + fn consume_4byte_intro(&mut self) { + debug_assert!(self.next_byte_unchecked() & 0xF0 == 0xF0); + // This takes two UTF-16 characters to represent, so we + // actually have an undercount. + self.current_line_start_position = self.current_line_start_position.wrapping_sub(1); + self.position += 1; + } + + // Advance over a single byte; the byte must be a UTF-8 + // continuation byte. + #[inline] + fn consume_continuation_byte(&mut self) { + debug_assert!(self.next_byte_unchecked() & 0xC0 == 0x80); + // Continuation bytes contribute to column overcount. Note + // that due to the special case for the 4-byte sequence intro, + // we must use wrapping add here. + self.current_line_start_position = self.current_line_start_position.wrapping_add(1); + self.position += 1; + } + + // Advance over any kind of byte, excluding newlines. + #[inline(never)] + fn consume_known_byte(&mut self, byte: u8) { + debug_assert!(byte != b'\r' && byte != b'\n' && byte != b'\x0C'); + self.position += 1; + // Continuation bytes contribute to column overcount. + if byte & 0xF0 == 0xF0 { + // This takes two UTF-16 characters to represent, so we + // actually have an undercount. + self.current_line_start_position = self.current_line_start_position.wrapping_sub(1); + } else if byte & 0xC0 == 0x80 { + // Note that due to the special case for the 4-byte + // sequence intro, we must use wrapping add here. + self.current_line_start_position = self.current_line_start_position.wrapping_add(1); + } + } + + #[inline] + fn next_char(&self) -> char { + self.input[self.position..].chars().next().unwrap() + } + + // Given that a newline has been seen, advance over the newline + // and update the state. + #[inline] + fn consume_newline(&mut self) { + let byte = self.next_byte_unchecked(); + debug_assert!(byte == b'\r' || byte == b'\n' || byte == b'\x0C'); + self.position += 1; + if byte == b'\r' && self.next_byte() == Some(b'\n') { + self.position += 1; + } + self.current_line_start_position = self.position; + self.current_line_number += 1; + } + + #[inline] + fn has_newline_at(&self, offset: usize) -> bool { + self.position + offset < self.input.len() + && matches!(self.byte_at(offset), b'\n' | b'\r' | b'\x0C') + } + + #[inline] + fn consume_char(&mut self) -> char { + let c = self.next_char(); + let len_utf8 = c.len_utf8(); + self.position += len_utf8; + // Note that due to the special case for the 4-byte sequence + // intro, we must use wrapping add here. + self.current_line_start_position = self + .current_line_start_position + .wrapping_add(len_utf8 - c.len_utf16()); + c + } + + #[inline] + fn starts_with(&self, needle: &[u8]) -> bool { + self.input.as_bytes()[self.position..].starts_with(needle) + } + + pub fn skip_whitespace(&mut self) { + while !self.is_eof() { + match_byte! { self.next_byte_unchecked(), + b' ' | b'\t' => { + self.advance(1) + }, + b'\n' | b'\x0C' | b'\r' => { + self.consume_newline(); + }, + b'/' => { + if self.starts_with(b"/*") { + consume_comment(self); + } else { + return + } + } + _ => { + return + } + } + } + } + + pub fn skip_cdc_and_cdo(&mut self) { + while !self.is_eof() { + match_byte! { self.next_byte_unchecked(), + b' ' | b'\t' => { + self.advance(1) + }, + b'\n' | b'\x0C' | b'\r' => { + self.consume_newline(); + }, + b'/' => { + if self.starts_with(b"/*") { + consume_comment(self); + } else { + return + } + } + b'<' => { + if self.starts_with(b"<!--") { + self.advance(4) + } else { + return + } + } + b'-' => { + if self.starts_with(b"-->") { + self.advance(3) + } else { + return + } + } + _ => { + return + } + } + } + } +} + +/// A position from the start of the input, counted in UTF-8 bytes. +#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] +pub struct SourcePosition(pub(crate) usize); + +impl SourcePosition { + /// Returns the current byte index in the original input. + #[inline] + pub fn byte_index(&self) -> usize { + self.0 + } +} + +/// The line and column number for a given position within the input. +#[derive(PartialEq, Eq, Debug, Clone, Copy)] +pub struct SourceLocation { + /// The line number, starting at 0 for the first line, unless `with_first_line_number` was used. + pub line: u32, + + /// The column number within a line, starting at 1 for first the character of the line. + /// Column numbers are counted in UTF-16 code units. + pub column: u32, +} + +fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> { + if tokenizer.is_eof() { + return Err(()); + } + let b = tokenizer.next_byte_unchecked(); + let token = match_byte! { b, + b' ' | b'\t' => { + consume_whitespace(tokenizer, false) + }, + b'\n' | b'\x0C' | b'\r' => { + consume_whitespace(tokenizer, true) + }, + b'"' => { consume_string(tokenizer, false) }, + b'#' => { + tokenizer.advance(1); + if is_ident_start(tokenizer) { IDHash(consume_name(tokenizer)) } + else if !tokenizer.is_eof() && match tokenizer.next_byte_unchecked() { + // Any other valid case here already resulted in IDHash. + b'0'..=b'9' | b'-' => true, + _ => false, + } { Hash(consume_name(tokenizer)) } + else { Delim('#') } + }, + b'$' => { + if tokenizer.starts_with(b"$=") { tokenizer.advance(2); SuffixMatch } + else { tokenizer.advance(1); Delim('$') } + }, + b'\'' => { consume_string(tokenizer, true) }, + b'(' => { tokenizer.advance(1); ParenthesisBlock }, + b')' => { tokenizer.advance(1); CloseParenthesis }, + b'*' => { + if tokenizer.starts_with(b"*=") { tokenizer.advance(2); SubstringMatch } + else { tokenizer.advance(1); Delim('*') } + }, + b'+' => { + if ( + tokenizer.has_at_least(1) + && matches!(tokenizer.byte_at(1), b'0'..=b'9') + ) || ( + tokenizer.has_at_least(2) + && tokenizer.byte_at(1) == b'.' + && matches!(tokenizer.byte_at(2), b'0'..=b'9') + ) { + consume_numeric(tokenizer) + } else { + tokenizer.advance(1); + Delim('+') + } + }, + b',' => { tokenizer.advance(1); Comma }, + b'-' => { + if ( + tokenizer.has_at_least(1) + && matches!(tokenizer.byte_at(1), b'0'..=b'9') + ) || ( + tokenizer.has_at_least(2) + && tokenizer.byte_at(1) == b'.' + && matches!(tokenizer.byte_at(2), b'0'..=b'9') + ) { + consume_numeric(tokenizer) + } else if tokenizer.starts_with(b"-->") { + tokenizer.advance(3); + CDC + } else if is_ident_start(tokenizer) { + consume_ident_like(tokenizer) + } else { + tokenizer.advance(1); + Delim('-') + } + }, + b'.' => { + if tokenizer.has_at_least(1) + && matches!(tokenizer.byte_at(1), b'0'..=b'9' + ) { + consume_numeric(tokenizer) + } else { + tokenizer.advance(1); + Delim('.') + } + } + b'/' => { + if tokenizer.starts_with(b"/*") { + Comment(consume_comment(tokenizer)) + } else { + tokenizer.advance(1); + Delim('/') + } + } + b'0'..=b'9' => { consume_numeric(tokenizer) }, + b':' => { tokenizer.advance(1); Colon }, + b';' => { tokenizer.advance(1); Semicolon }, + b'<' => { + if tokenizer.starts_with(b"<!--") { + tokenizer.advance(4); + CDO + } else { + tokenizer.advance(1); + Delim('<') + } + }, + b'@' => { + tokenizer.advance(1); + if is_ident_start(tokenizer) { AtKeyword(consume_name(tokenizer)) } + else { Delim('@') } + }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { consume_ident_like(tokenizer) }, + b'[' => { tokenizer.advance(1); SquareBracketBlock }, + b'\\' => { + if !tokenizer.has_newline_at(1) { consume_ident_like(tokenizer) } + else { tokenizer.advance(1); Delim('\\') } + }, + b']' => { tokenizer.advance(1); CloseSquareBracket }, + b'^' => { + if tokenizer.starts_with(b"^=") { tokenizer.advance(2); PrefixMatch } + else { tokenizer.advance(1); Delim('^') } + }, + b'{' => { tokenizer.advance(1); CurlyBracketBlock }, + b'|' => { + if tokenizer.starts_with(b"|=") { tokenizer.advance(2); DashMatch } + else { tokenizer.advance(1); Delim('|') } + }, + b'}' => { tokenizer.advance(1); CloseCurlyBracket }, + b'~' => { + if tokenizer.starts_with(b"~=") { tokenizer.advance(2); IncludeMatch } + else { tokenizer.advance(1); Delim('~') } + }, + _ => { + if !b.is_ascii() { + consume_ident_like(tokenizer) + } else { + tokenizer.advance(1); + Delim(b as char) + } + }, + }; + Ok(token) +} + +fn consume_whitespace<'a>(tokenizer: &mut Tokenizer<'a>, newline: bool) -> Token<'a> { + let start_position = tokenizer.position(); + if newline { + tokenizer.consume_newline(); + } else { + tokenizer.advance(1); + } + while !tokenizer.is_eof() { + let b = tokenizer.next_byte_unchecked(); + match_byte! { b, + b' ' | b'\t' => { + tokenizer.advance(1); + } + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + _ => { + break + } + } + } + WhiteSpace(tokenizer.slice_from(start_position)) +} + +// Check for sourceMappingURL or sourceURL comments and update the +// tokenizer appropriately. +fn check_for_source_map<'a>(tokenizer: &mut Tokenizer<'a>, contents: &'a str) { + let directive = "# sourceMappingURL="; + let directive_old = "@ sourceMappingURL="; + + // If there is a source map directive, extract the URL. + if contents.starts_with(directive) || contents.starts_with(directive_old) { + let contents = &contents[directive.len()..]; + tokenizer.source_map_url = contents + .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') + .next() + } + + let directive = "# sourceURL="; + let directive_old = "@ sourceURL="; + + // If there is a source map directive, extract the URL. + if contents.starts_with(directive) || contents.starts_with(directive_old) { + let contents = &contents[directive.len()..]; + tokenizer.source_url = contents + .split(|c| c == ' ' || c == '\t' || c == '\x0C' || c == '\r' || c == '\n') + .next() + } +} + +fn consume_comment<'a>(tokenizer: &mut Tokenizer<'a>) -> &'a str { + tokenizer.advance(2); // consume "/*" + let start_position = tokenizer.position(); + while !tokenizer.is_eof() { + match_byte! { tokenizer.next_byte_unchecked(), + b'*' => { + let end_position = tokenizer.position(); + tokenizer.advance(1); + if tokenizer.next_byte() == Some(b'/') { + tokenizer.advance(1); + let contents = tokenizer.slice(start_position..end_position); + check_for_source_map(tokenizer, contents); + return contents + } + } + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _ => { + // ASCII or other leading byte. + tokenizer.advance(1); + } + } + } + let contents = tokenizer.slice_from(start_position); + check_for_source_map(tokenizer, contents); + contents +} + +fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { + match consume_quoted_string(tokenizer, single_quote) { + Ok(value) => QuotedString(value), + Err(value) => BadString(value), + } +} + +/// Return `Err(())` on syntax error (ie. unescaped newline) +fn consume_quoted_string<'a>( + tokenizer: &mut Tokenizer<'a>, + single_quote: bool, +) -> Result<CowRcStr<'a>, CowRcStr<'a>> { + tokenizer.advance(1); // Skip the initial quote + // start_pos is at code point boundary, after " or ' + let start_pos = tokenizer.position(); + let mut string_bytes; + loop { + if tokenizer.is_eof() { + return Ok(tokenizer.slice_from(start_pos).into()); + } + match_byte! { tokenizer.next_byte_unchecked(), + b'"' => { + if !single_quote { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return Ok(value.into()) + } + tokenizer.advance(1); + } + b'\'' => { + if single_quote { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return Ok(value.into()) + } + tokenizer.advance(1); + } + b'\\' | b'\0' => { + // * The tokenizer’s input is UTF-8 since it’s `&str`. + // * start_pos is at a code point boundary + // * so is the current position (which is before '\\' or '\0' + // + // So `string_bytes` is well-formed UTF-8. + string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); + break + } + b'\n' | b'\r' | b'\x0C' => { + return Err(tokenizer.slice_from(start_pos).into()) + }, + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _ => { + // ASCII or other leading byte. + tokenizer.advance(1); + } + } + } + + while !tokenizer.is_eof() { + let b = tokenizer.next_byte_unchecked(); + match_byte! { b, + b'\n' | b'\r' | b'\x0C' => { + return Err( + // string_bytes is well-formed UTF-8, see other comments. + unsafe { + from_utf8_release_unchecked(string_bytes) + }.into() + ); + } + b'"' => { + tokenizer.advance(1); + if !single_quote { + break; + } + } + b'\'' => { + tokenizer.advance(1); + if single_quote { + break; + } + } + b'\\' => { + tokenizer.advance(1); + if !tokenizer.is_eof() { + match tokenizer.next_byte_unchecked() { + // Escaped newline + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + // This pushes one well-formed code point + _ => consume_escape_and_write(tokenizer, &mut string_bytes) + } + } + // else: escaped EOF, do nothing. + continue; + } + b'\0' => { + tokenizer.advance(1); + string_bytes.extend("\u{FFFD}".as_bytes()); + continue; + } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _ => { + // ASCII or other leading byte. + tokenizer.advance(1); + }, + } + + // If this byte is part of a multi-byte code point, + // we’ll end up copying the whole code point before this loop does something else. + string_bytes.push(b); + } + + Ok( + // string_bytes is well-formed UTF-8, see other comments. + unsafe { from_utf8_release_unchecked(string_bytes) }.into(), + ) +} + +#[inline] +fn is_ident_start(tokenizer: &mut Tokenizer) -> bool { + !tokenizer.is_eof() + && match_byte! { tokenizer.next_byte_unchecked(), + b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'\0' => { true }, + b'-' => { + tokenizer.has_at_least(1) && match_byte! { tokenizer.byte_at(1), + b'a'..=b'z' | b'A'..=b'Z' | b'-' | b'_' | b'\0' => { + true + } + b'\\' => { !tokenizer.has_newline_at(1) } + b => { !b.is_ascii() }, + } + }, + b'\\' => { !tokenizer.has_newline_at(1) }, + b => { !b.is_ascii() }, + } +} + +fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { + let value = consume_name(tokenizer); + if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'(' { + tokenizer.advance(1); + if value.eq_ignore_ascii_case("url") { + consume_unquoted_url(tokenizer).unwrap_or(Function(value)) + } else { + tokenizer.see_function(&value); + Function(value) + } + } else { + Ident(value) + } +} + +fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CowRcStr<'a> { + // start_pos is the end of the previous token, therefore at a code point boundary + let start_pos = tokenizer.position(); + let mut value_bytes; + loop { + if tokenizer.is_eof() { + return tokenizer.slice_from(start_pos).into(); + } + match_byte! { tokenizer.next_byte_unchecked(), + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { tokenizer.advance(1) }, + b'\\' | b'\0' => { + // * The tokenizer’s input is UTF-8 since it’s `&str`. + // * start_pos is at a code point boundary + // * so is the current position (which is before '\\' or '\0' + // + // So `value_bytes` is well-formed UTF-8. + value_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); + break + } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xC0'..=b'\xEF' => { tokenizer.advance(1); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _b => { + return tokenizer.slice_from(start_pos).into(); + } + } + } + + while !tokenizer.is_eof() { + let b = tokenizer.next_byte_unchecked(); + match_byte! { b, + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' | b'-' => { + tokenizer.advance(1); + value_bytes.push(b) // ASCII + } + b'\\' => { + if tokenizer.has_newline_at(1) { break } + tokenizer.advance(1); + // This pushes one well-formed code point + consume_escape_and_write(tokenizer, &mut value_bytes) + } + b'\0' => { + tokenizer.advance(1); + value_bytes.extend("\u{FFFD}".as_bytes()); + }, + b'\x80'..=b'\xBF' => { + // This byte *is* part of a multi-byte code point, + // we’ll end up copying the whole code point before this loop does something else. + tokenizer.consume_continuation_byte(); + value_bytes.push(b) + } + b'\xC0'..=b'\xEF' => { + // This byte *is* part of a multi-byte code point, + // we’ll end up copying the whole code point before this loop does something else. + tokenizer.advance(1); + value_bytes.push(b) + } + b'\xF0'..=b'\xFF' => { + tokenizer.consume_4byte_intro(); + value_bytes.push(b) + } + _ => { + // ASCII + break; + } + } + } + // string_bytes is well-formed UTF-8, see other comments. + unsafe { from_utf8_release_unchecked(value_bytes) }.into() +} + +fn byte_to_hex_digit(b: u8) -> Option<u32> { + Some(match_byte! { b, + b'0' ..= b'9' => { b - b'0' }, + b'a' ..= b'f' => { b - b'a' + 10 }, + b'A' ..= b'F' => { b - b'A' + 10 }, + _ => { + return None + } + } as u32) +} + +fn byte_to_decimal_digit(b: u8) -> Option<u32> { + if b >= b'0' && b <= b'9' { + Some((b - b'0') as u32) + } else { + None + } +} + +fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { + // Parse [+-]?\d*(\.\d+)?([eE][+-]?\d+)? + // But this is always called so that there is at least one digit in \d*(\.\d+)? + + // Do all the math in f64 so that large numbers overflow to +/-inf + // and i32::{MIN, MAX} are within range. + + let (has_sign, sign) = match tokenizer.next_byte_unchecked() { + b'-' => (true, -1.), + b'+' => (true, 1.), + _ => (false, 1.), + }; + if has_sign { + tokenizer.advance(1); + } + + let mut integral_part: f64 = 0.; + while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) { + integral_part = integral_part * 10. + digit as f64; + tokenizer.advance(1); + if tokenizer.is_eof() { + break; + } + } + + let mut is_integer = true; + + let mut fractional_part: f64 = 0.; + if tokenizer.has_at_least(1) + && tokenizer.next_byte_unchecked() == b'.' + && matches!(tokenizer.byte_at(1), b'0'..=b'9') + { + is_integer = false; + tokenizer.advance(1); // Consume '.' + let mut factor = 0.1; + while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) { + fractional_part += digit as f64 * factor; + factor *= 0.1; + tokenizer.advance(1); + if tokenizer.is_eof() { + break; + } + } + } + + let mut value = sign * (integral_part + fractional_part); + + if tokenizer.has_at_least(1) && matches!(tokenizer.next_byte_unchecked(), b'e' | b'E') { + if matches!(tokenizer.byte_at(1), b'0'..=b'9') + || (tokenizer.has_at_least(2) + && matches!(tokenizer.byte_at(1), b'+' | b'-') + && matches!(tokenizer.byte_at(2), b'0'..=b'9')) + { + is_integer = false; + tokenizer.advance(1); + let (has_sign, sign) = match tokenizer.next_byte_unchecked() { + b'-' => (true, -1.), + b'+' => (true, 1.), + _ => (false, 1.), + }; + if has_sign { + tokenizer.advance(1); + } + let mut exponent: f64 = 0.; + while let Some(digit) = byte_to_decimal_digit(tokenizer.next_byte_unchecked()) { + exponent = exponent * 10. + digit as f64; + tokenizer.advance(1); + if tokenizer.is_eof() { + break; + } + } + value *= f64::powf(10., sign * exponent); + } + } + + let int_value = if is_integer { + Some(if value >= i32::MAX as f64 { + i32::MAX + } else if value <= i32::MIN as f64 { + i32::MIN + } else { + value as i32 + }) + } else { + None + }; + + if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' { + tokenizer.advance(1); + return Percentage { + unit_value: (value / 100.) as f32, + int_value: int_value, + has_sign: has_sign, + }; + } + let value = value as f32; + if is_ident_start(tokenizer) { + let unit = consume_name(tokenizer); + Dimension { + value: value, + int_value: int_value, + has_sign: has_sign, + unit: unit, + } + } else { + Number { + value: value, + int_value: int_value, + has_sign: has_sign, + } + } +} + +#[inline] +unsafe fn from_utf8_release_unchecked(string_bytes: Vec<u8>) -> String { + if cfg!(debug_assertions) { + String::from_utf8(string_bytes).unwrap() + } else { + String::from_utf8_unchecked(string_bytes) + } +} + +fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> { + // This is only called after "url(", so the current position is a code point boundary. + let start_position = tokenizer.position; + let from_start = &tokenizer.input[tokenizer.position..]; + let mut newlines = 0; + let mut last_newline = 0; + let mut found_printable_char = false; + let mut iter = from_start.bytes().enumerate(); + loop { + let (offset, b) = match iter.next() { + Some(item) => item, + None => { + tokenizer.position = tokenizer.input.len(); + break; + } + }; + match_byte! { b, + b' ' | b'\t' => {}, + b'\n' | b'\x0C' => { + newlines += 1; + last_newline = offset; + } + b'\r' => { + if from_start.as_bytes().get(offset + 1) != Some(&b'\n') { + newlines += 1; + last_newline = offset; + } + } + b'"' | b'\'' => { return Err(()) }, // Do not advance + b')' => { + // Don't use advance, because we may be skipping + // newlines here, and we want to avoid the assert. + tokenizer.position += offset + 1; + break + } + _ => { + // Don't use advance, because we may be skipping + // newlines here, and we want to avoid the assert. + tokenizer.position += offset; + found_printable_char = true; + break + } + } + } + + if newlines > 0 { + tokenizer.current_line_number += newlines; + // No need for wrapping_add here, because there's no possible + // way to wrap. + tokenizer.current_line_start_position = start_position + last_newline + 1; + } + + if found_printable_char { + // This function only consumed ASCII (whitespace) bytes, + // so the current position is a code point boundary. + return Ok(consume_unquoted_url_internal(tokenizer)); + } else { + return Ok(UnquotedUrl("".into())); + } + + fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { + // This function is only called with start_pos at a code point boundary. + let start_pos = tokenizer.position(); + let mut string_bytes: Vec<u8>; + loop { + if tokenizer.is_eof() { + return UnquotedUrl(tokenizer.slice_from(start_pos).into()); + } + match_byte! { tokenizer.next_byte_unchecked(), + b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { + let value = tokenizer.slice_from(start_pos); + return consume_url_end(tokenizer, start_pos, value.into()) + } + b')' => { + let value = tokenizer.slice_from(start_pos); + tokenizer.advance(1); + return UnquotedUrl(value.into()) + } + b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable + | b'"' | b'\'' | b'(' => { + tokenizer.advance(1); + return consume_bad_url(tokenizer, start_pos) + }, + b'\\' | b'\0' => { + // * The tokenizer’s input is UTF-8 since it’s `&str`. + // * start_pos is at a code point boundary + // * so is the current position (which is before '\\' or '\0' + // + // So `string_bytes` is well-formed UTF-8. + string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); + break + } + b'\x80'..=b'\xBF' => { tokenizer.consume_continuation_byte(); } + b'\xF0'..=b'\xFF' => { tokenizer.consume_4byte_intro(); } + _ => { + // ASCII or other leading byte. + tokenizer.advance(1); + } + } + } + while !tokenizer.is_eof() { + let b = tokenizer.next_byte_unchecked(); + match_byte! { b, + b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { + // string_bytes is well-formed UTF-8, see other comments. + let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into(); + return consume_url_end(tokenizer, start_pos, string) + } + b')' => { + tokenizer.advance(1); + break; + } + b'\x01'..=b'\x08' | b'\x0B' | b'\x0E'..=b'\x1F' | b'\x7F' // non-printable + | b'"' | b'\'' | b'(' => { + tokenizer.advance(1); + return consume_bad_url(tokenizer, start_pos); + } + b'\\' => { + tokenizer.advance(1); + if tokenizer.has_newline_at(0) { + return consume_bad_url(tokenizer, start_pos) + } + + // This pushes one well-formed code point to string_bytes + consume_escape_and_write(tokenizer, &mut string_bytes) + }, + b'\0' => { + tokenizer.advance(1); + string_bytes.extend("\u{FFFD}".as_bytes()); + } + b'\x80'..=b'\xBF' => { + // We’ll end up copying the whole code point + // before this loop does something else. + tokenizer.consume_continuation_byte(); + string_bytes.push(b); + } + b'\xF0'..=b'\xFF' => { + // We’ll end up copying the whole code point + // before this loop does something else. + tokenizer.consume_4byte_intro(); + string_bytes.push(b); + } + // If this byte is part of a multi-byte code point, + // we’ll end up copying the whole code point before this loop does something else. + b => { + // ASCII or other leading byte. + tokenizer.advance(1); + string_bytes.push(b) + } + } + } + UnquotedUrl( + // string_bytes is well-formed UTF-8, see other comments. + unsafe { from_utf8_release_unchecked(string_bytes) }.into(), + ) + } + + fn consume_url_end<'a>( + tokenizer: &mut Tokenizer<'a>, + start_pos: SourcePosition, + string: CowRcStr<'a>, + ) -> Token<'a> { + while !tokenizer.is_eof() { + match_byte! { tokenizer.next_byte_unchecked(), + b')' => { + tokenizer.advance(1); + break + } + b' ' | b'\t' => { tokenizer.advance(1); } + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + b => { + tokenizer.consume_known_byte(b); + return consume_bad_url(tokenizer, start_pos); + } + } + } + UnquotedUrl(string) + } + + fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> { + // Consume up to the closing ) + while !tokenizer.is_eof() { + match_byte! { tokenizer.next_byte_unchecked(), + b')' => { + let contents = tokenizer.slice_from(start_pos).into(); + tokenizer.advance(1); + return BadUrl(contents) + } + b'\\' => { + tokenizer.advance(1); + if matches!(tokenizer.next_byte(), Some(b')') | Some(b'\\')) { + tokenizer.advance(1); // Skip an escaped ')' or '\' + } + } + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + b => { + tokenizer.consume_known_byte(b); + } + } + } + BadUrl(tokenizer.slice_from(start_pos).into()) + } +} + +// (value, number of digits up to 6) +fn consume_hex_digits<'a>(tokenizer: &mut Tokenizer<'a>) -> (u32, u32) { + let mut value = 0; + let mut digits = 0; + while digits < 6 && !tokenizer.is_eof() { + match byte_to_hex_digit(tokenizer.next_byte_unchecked()) { + Some(digit) => { + value = value * 16 + digit; + digits += 1; + tokenizer.advance(1); + } + None => break, + } + } + (value, digits) +} + +// Same constraints as consume_escape except it writes into `bytes` the result +// instead of returning it. +fn consume_escape_and_write(tokenizer: &mut Tokenizer, bytes: &mut Vec<u8>) { + bytes.extend( + consume_escape(tokenizer) + .encode_utf8(&mut [0; 4]) + .as_bytes(), + ) +} + +// Assumes that the U+005C REVERSE SOLIDUS (\) has already been consumed +// and that the next input character has already been verified +// to not be a newline. +fn consume_escape(tokenizer: &mut Tokenizer) -> char { + if tokenizer.is_eof() { + return '\u{FFFD}'; + } // Escaped EOF + match_byte! { tokenizer.next_byte_unchecked(), + b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => { + let (c, _) = consume_hex_digits(tokenizer); + if !tokenizer.is_eof() { + match_byte! { tokenizer.next_byte_unchecked(), + b' ' | b'\t' => { + tokenizer.advance(1) + } + b'\n' | b'\x0C' | b'\r' => { + tokenizer.consume_newline(); + } + _ => {} + } + } + static REPLACEMENT_CHAR: char = '\u{FFFD}'; + if c != 0 { + let c = char::from_u32(c); + c.unwrap_or(REPLACEMENT_CHAR) + } else { + REPLACEMENT_CHAR + } + }, + b'\0' => { + tokenizer.advance(1); + '\u{FFFD}' + } + _ => { tokenizer.consume_char() } + } +} diff --git a/third_party/rust/cssparser/src/unicode_range.rs b/third_party/rust/cssparser/src/unicode_range.rs new file mode 100644 index 0000000000..b0a2017cbf --- /dev/null +++ b/third_party/rust/cssparser/src/unicode_range.rs @@ -0,0 +1,181 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! https://drafts.csswg.org/css-syntax/#urange + +use crate::tokenizer::Token; +use crate::{BasicParseError, Parser, ToCss}; +use std::char; +use std::fmt; + +/// One contiguous range of code points. +/// +/// Can not be empty. Can represent a single code point when start == end. +#[derive(PartialEq, Eq, Clone, Hash)] +#[repr(C)] +pub struct UnicodeRange { + /// Inclusive start of the range. In [0, end]. + pub start: u32, + + /// Inclusive end of the range. In [0, 0x10FFFF]. + pub end: u32, +} + +impl UnicodeRange { + /// https://drafts.csswg.org/css-syntax/#urange-syntax + pub fn parse<'i, 't>(input: &mut Parser<'i, 't>) -> Result<Self, BasicParseError<'i>> { + // <urange> = + // u '+' <ident-token> '?'* | + // u <dimension-token> '?'* | + // u <number-token> '?'* | + // u <number-token> <dimension-token> | + // u <number-token> <number-token> | + // u '+' '?'+ + + input.expect_ident_matching("u")?; + let after_u = input.position(); + parse_tokens(input)?; + + // This deviates from the spec in case there are CSS comments + // between tokens in the middle of one <unicode-range>, + // but oh well… + let concatenated_tokens = input.slice_from(after_u); + + let range = match parse_concatenated(concatenated_tokens.as_bytes()) { + Ok(range) => range, + Err(()) => { + return Err(input + .new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into()))) + } + }; + if range.end > char::MAX as u32 || range.start > range.end { + Err(input.new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into()))) + } else { + Ok(range) + } + } +} + +fn parse_tokens<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseError<'i>> { + match input.next_including_whitespace()?.clone() { + Token::Delim('+') => { + match *input.next_including_whitespace()? { + Token::Ident(_) => {} + Token::Delim('?') => {} + ref t => { + let t = t.clone(); + return Err(input.new_basic_unexpected_token_error(t)); + } + } + parse_question_marks(input) + } + Token::Dimension { .. } => parse_question_marks(input), + Token::Number { .. } => { + let after_number = input.state(); + match input.next_including_whitespace() { + Ok(&Token::Delim('?')) => parse_question_marks(input), + Ok(&Token::Dimension { .. }) => {} + Ok(&Token::Number { .. }) => {} + _ => input.reset(&after_number), + } + } + t => return Err(input.new_basic_unexpected_token_error(t)), + } + Ok(()) +} + +/// Consume as many '?' as possible +fn parse_question_marks(input: &mut Parser) { + loop { + let start = input.state(); + match input.next_including_whitespace() { + Ok(&Token::Delim('?')) => {} + _ => { + input.reset(&start); + return; + } + } + } +} + +fn parse_concatenated(text: &[u8]) -> Result<UnicodeRange, ()> { + let mut text = match text.split_first() { + Some((&b'+', text)) => text, + _ => return Err(()), + }; + let (first_hex_value, hex_digit_count) = consume_hex(&mut text); + let question_marks = consume_question_marks(&mut text); + let consumed = hex_digit_count + question_marks; + if consumed == 0 || consumed > 6 { + return Err(()); + } + + if question_marks > 0 { + if text.is_empty() { + return Ok(UnicodeRange { + start: first_hex_value << (question_marks * 4), + end: ((first_hex_value + 1) << (question_marks * 4)) - 1, + }); + } + } else if text.is_empty() { + return Ok(UnicodeRange { + start: first_hex_value, + end: first_hex_value, + }); + } else { + if let Some((&b'-', mut text)) = text.split_first() { + let (second_hex_value, hex_digit_count) = consume_hex(&mut text); + if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() { + return Ok(UnicodeRange { + start: first_hex_value, + end: second_hex_value, + }); + } + } + } + Err(()) +} + +fn consume_hex(text: &mut &[u8]) -> (u32, usize) { + let mut value = 0; + let mut digits = 0; + while let Some((&byte, rest)) = text.split_first() { + if let Some(digit_value) = (byte as char).to_digit(16) { + value = value * 0x10 + digit_value; + digits += 1; + *text = rest + } else { + break; + } + } + (value, digits) +} + +fn consume_question_marks(text: &mut &[u8]) -> usize { + let mut question_marks = 0; + while let Some((&b'?', rest)) = text.split_first() { + question_marks += 1; + *text = rest + } + question_marks +} + +impl fmt::Debug for UnicodeRange { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.to_css(formatter) + } +} + +impl ToCss for UnicodeRange { + fn to_css<W>(&self, dest: &mut W) -> fmt::Result + where + W: fmt::Write, + { + write!(dest, "U+{:X}", self.start)?; + if self.end != self.start { + write!(dest, "-{:X}", self.end)?; + } + Ok(()) + } +} |