diff options
Diffstat (limited to 'vendor/url/src')
-rw-r--r-- | vendor/url/src/host.rs | 10 | ||||
-rw-r--r-- | vendor/url/src/lib.rs | 114 | ||||
-rw-r--r-- | vendor/url/src/parser.rs | 80 | ||||
-rw-r--r-- | vendor/url/src/slicing.rs | 32 |
4 files changed, 207 insertions, 29 deletions
diff --git a/vendor/url/src/host.rs b/vendor/url/src/host.rs index f1921c654..9931c2f87 100644 --- a/vendor/url/src/host.rs +++ b/vendor/url/src/host.rs @@ -269,7 +269,7 @@ fn ends_in_a_number(input: &str) -> bool { } else { last }; - if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) { + if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) { return true; } @@ -297,11 +297,9 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> { } let valid_number = match r { - 8 => input.chars().all(|c| ('0'..='7').contains(&c)), - 10 => input.chars().all(|c| ('0'..='9').contains(&c)), - 16 => input.chars().all(|c| { - ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c) - }), + 8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)), + 10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()), + 16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()), _ => false, }; if !valid_number { diff --git a/vendor/url/src/lib.rs b/vendor/url/src/lib.rs index 6dc09d12f..ad3c89001 100644 --- a/vendor/url/src/lib.rs +++ b/vendor/url/src/lib.rs @@ -121,7 +121,7 @@ url = { version = "2", features = ["serde"] } */ -#![doc(html_root_url = "https://docs.rs/url/2.3.1")] +#![doc(html_root_url = "https://docs.rs/url/2.4.0")] #![cfg_attr( feature = "debugger_visualizer", feature(debugger_visualizer), @@ -322,6 +322,32 @@ impl Url { url } + /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path + fn strip_trailing_spaces_from_opaque_path(&mut self) { + if !self.cannot_be_a_base() { + return; + } + + if self.fragment_start.is_some() { + return; + } + + if self.query_start.is_some() { + return; + } + + let trailing_space_count = self + .serialization + .chars() + .rev() + .take_while(|c| *c == ' ') + .count(); + + let start = self.serialization.len() - trailing_space_count; + + self.serialization.truncate(start); + } + /// Parse a string as an URL, with this URL as the base URL. /// /// The inverse of this is [`make_relative`]. @@ -601,7 +627,7 @@ impl Url { } assert!(self.scheme_end >= 1); - assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z')); + assert!(self.byte_at(0).is_ascii_alphabetic()); assert!(self .slice(1..self.scheme_end) .chars() @@ -657,7 +683,14 @@ impl Url { assert_eq!(self.host_end, self.scheme_end + 1); assert_eq!(self.host, HostInternal::None); assert_eq!(self.port, None); - assert_eq!(self.path_start, self.scheme_end + 1); + if self.path().starts_with("//") { + // special case when first path segment is empty + assert_eq!(self.byte_at(self.scheme_end + 1), b'/'); + assert_eq!(self.byte_at(self.scheme_end + 2), b'.'); + assert_eq!(self.path_start, self.scheme_end + 3); + } else { + assert_eq!(self.path_start, self.scheme_end + 1); + } } if let Some(start) = self.query_start { assert!(start >= self.path_start); @@ -786,12 +819,35 @@ impl Url { self.slice(..self.scheme_end) } + /// Return whether the URL is special (has a special scheme) + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// assert!(Url::parse("http:///tmp/foo")?.is_special()); + /// assert!(Url::parse("file:///tmp/foo")?.is_special()); + /// assert!(!Url::parse("moz:///tmp/foo")?.is_special()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn is_special(&self) -> bool { + let scheme_type = SchemeType::from(self.scheme()); + scheme_type.is_special() + } + /// Return whether the URL has an 'authority', /// which can contain a username, password, host, and port number. /// /// URLs that do *not* are either path-only like `unix:/run/foo.socket` /// or cannot-be-a-base like `data:text/plain,Stuff`. /// + /// See also the `authority` method. + /// /// # Examples /// /// ``` @@ -817,6 +873,47 @@ impl Url { self.slice(self.scheme_end..).starts_with("://") } + /// Return the authority of this URL as an ASCII string. + /// + /// Non-ASCII domains are punycode-encoded per IDNA if this is the host + /// of a special URL, or percent encoded for non-special URLs. + /// IPv6 addresses are given between `[` and `]` brackets. + /// Ports are omitted if they match the well known port of a special URL. + /// + /// Username and password are percent-encoded. + /// + /// See also the `has_authority` method. + /// + /// # Examples + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let url = Url::parse("unix:/run/foo.socket")?; + /// assert_eq!(url.authority(), ""); + /// let url = Url::parse("file:///tmp/foo")?; + /// assert_eq!(url.authority(), ""); + /// let url = Url::parse("https://user:password@example.com/tmp/foo")?; + /// assert_eq!(url.authority(), "user:password@example.com"); + /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?; + /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667"); + /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?; + /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com"); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + pub fn authority(&self) -> &str { + let scheme_separator_len = "://".len() as u32; + if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len { + self.slice(self.scheme_end + scheme_separator_len..self.path_start) + } else { + "" + } + } + /// Return whether this URL is a cannot-be-a-base URL, /// meaning that parsing a relative URL string with this URL as the base will return an error. /// @@ -1391,7 +1488,8 @@ impl Url { self.serialization.push('#'); self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) } else { - self.fragment_start = None + self.fragment_start = None; + self.strip_trailing_spaces_from_opaque_path(); } } @@ -1454,6 +1552,9 @@ impl Url { parser::Input::trim_tab_and_newlines(input, vfn), ) }); + } else { + self.query_start = None; + self.strip_trailing_spaces_from_opaque_path(); } self.restore_already_parsed_fragment(fragment); @@ -1989,7 +2090,8 @@ impl Url { if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { return Err(()); } - if let Some(password) = password { + let password = password.unwrap_or_default(); + if !password.is_empty() { let host_and_after = self.slice(self.host_start..).to_owned(); self.serialization.truncate(self.username_end as usize); self.serialization.push(':'); @@ -2805,7 +2907,7 @@ fn file_url_segments_to_pathbuf( // A windows drive letter must end with a slash. if bytes.len() > 2 - && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') + && bytes[bytes.len() - 2].is_ascii_alphabetic() && matches!(bytes[bytes.len() - 1], b':' | b'|') { bytes.push(b'/'); diff --git a/vendor/url/src/parser.rs b/vendor/url/src/parser.rs index f5438c505..765cc027c 100644 --- a/vendor/url/src/parser.rs +++ b/vendor/url/src/parser.rs @@ -157,9 +157,11 @@ impl SchemeType { pub fn is_file(&self) -> bool { matches!(*self, SchemeType::File) } +} - pub fn from(s: &str) -> Self { - match s { +impl<T: AsRef<str>> From<T> for SchemeType { + fn from(s: T) -> Self { + match s.as_ref() { "http" | "https" | "ws" | "wss" | "ftp" => SchemeType::SpecialNotFile, "file" => SchemeType::File, _ => SchemeType::NotSpecial, @@ -176,7 +178,7 @@ pub fn default_port(scheme: &str) -> Option<u16> { } } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Input<'i> { chars: str::Chars<'i>, } @@ -474,9 +476,8 @@ impl<'a> Parser<'a> { let host = HostInternal::None; let port = None; let remaining = if let Some(input) = input.split_prefix('/') { - let path_start = self.serialization.len(); self.serialization.push('/'); - self.parse_path(scheme_type, &mut false, path_start, input) + self.parse_path(scheme_type, &mut false, path_start as usize, input) } else { self.parse_cannot_be_a_base_path(input) }; @@ -1156,7 +1157,7 @@ impl<'a> Parser<'a> { return input; } - if maybe_c != None && maybe_c != Some('/') { + if maybe_c.is_some() && maybe_c != Some('/') { self.serialization.push('/'); } // Otherwise, if c is not the EOF code point: @@ -1172,7 +1173,7 @@ impl<'a> Parser<'a> { ) -> Input<'i> { // Relative path state loop { - let segment_start = self.serialization.len(); + let mut segment_start = self.serialization.len(); let mut ends_with_slash = false; loop { let input_before_c = input.clone(); @@ -1201,6 +1202,14 @@ impl<'a> Parser<'a> { } _ => { self.check_url_code_point(c, &input); + if scheme_type.is_file() + && is_normalized_windows_drive_letter( + &self.serialization[path_start + 1..], + ) + { + self.serialization.push('/'); + segment_start += 1; + } if self.context == Context::PathSegmentSetter { if scheme_type.is_special() { self.serialization @@ -1248,7 +1257,10 @@ impl<'a> Parser<'a> { } _ => { // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then - if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) { + if scheme_type.is_file() + && segment_start == path_start + 1 + && is_windows_drive_letter(segment_before_slash) + { // Replace the second code point in buffer with U+003A (:). if let Some(c) = segment_before_slash.chars().next() { self.serialization.truncate(segment_start); @@ -1354,9 +1366,50 @@ impl<'a> Parser<'a> { host_end: u32, host: HostInternal, port: Option<u16>, - path_start: u32, + mut path_start: u32, remaining: Input<'_>, ) -> ParseResult<Url> { + // Special case for anarchist URL's with a leading empty path segment + // This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/, + // when parsed and then serialized, from ending up as web+demo://not-a-host/ + // (they end up as web+demo:/.//not-a-host/). + // + // If url’s host is null, url does not have an opaque path, + // url’s path’s size is greater than 1, and url’s path[0] is the empty string, + // then append U+002F (/) followed by U+002E (.) to output. + let scheme_end_as_usize = scheme_end as usize; + let path_start_as_usize = path_start as usize; + if path_start_as_usize == scheme_end_as_usize + 1 { + // Anarchist URL + if self.serialization[path_start_as_usize..].starts_with("//") { + // Case 1: The base URL did not have an empty path segment, but the resulting one does + // Insert the "/." prefix + self.serialization.insert_str(path_start_as_usize, "/."); + path_start += 2; + } + assert!(!self.serialization[scheme_end_as_usize..].starts_with("://")); + } else if path_start_as_usize == scheme_end_as_usize + 3 + && &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/." + { + // Anarchist URL with leading empty path segment + // The base URL has a "/." between the host and the path + assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/'); + if self + .serialization + .as_bytes() + .get(path_start_as_usize + 1) + .copied() + != Some(b'/') + { + // Case 2: The base URL had an empty path segment, but the resulting one does not + // Remove the "/." prefix + self.serialization + .replace_range(scheme_end_as_usize..path_start_as_usize, ":"); + path_start -= 2; + } + assert!(!self.serialization[scheme_end_as_usize..].starts_with("://")); + } + let (query_start, fragment_start) = self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; Ok(Url { @@ -1477,7 +1530,7 @@ impl<'a> Parser<'a> { if c == '%' { let mut input = input.clone(); if !matches!((input.next(), input.next()), (Some(a), Some(b)) - if is_ascii_hex_digit(a) && is_ascii_hex_digit(b)) + if a.is_ascii_hexdigit() && b.is_ascii_hexdigit()) { vfn(SyntaxViolation::PercentDecode) } @@ -1488,11 +1541,6 @@ impl<'a> Parser<'a> { } } -#[inline] -fn is_ascii_hex_digit(c: char) -> bool { - matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9') -} - // Non URL code points: // U+0000 to U+0020 (space) // " # % < > [ \ ] ^ ` { | } @@ -1534,7 +1582,7 @@ fn ascii_tab_or_new_line(ch: char) -> bool { /// https://url.spec.whatwg.org/#ascii-alpha #[inline] pub fn ascii_alpha(ch: char) -> bool { - matches!(ch, 'a'..='z' | 'A'..='Z') + ch.is_ascii_alphabetic() } #[inline] diff --git a/vendor/url/src/slicing.rs b/vendor/url/src/slicing.rs index a90337bb6..c061fee84 100644 --- a/vendor/url/src/slicing.rs +++ b/vendor/url/src/slicing.rs @@ -37,6 +37,29 @@ impl Index<Range<Position>> for Url { } } +// Counts how many base-10 digits are required to represent n in the given base +fn count_digits(n: u16) -> usize { + match n { + 0..=9 => 1, + 10..=99 => 2, + 100..=999 => 3, + 1000..=9999 => 4, + 10000..=65535 => 5, + } +} + +#[test] +fn test_count_digits() { + assert_eq!(count_digits(0), 1); + assert_eq!(count_digits(1), 1); + assert_eq!(count_digits(9), 1); + assert_eq!(count_digits(10), 2); + assert_eq!(count_digits(99), 2); + assert_eq!(count_digits(100), 3); + assert_eq!(count_digits(9999), 4); + assert_eq!(count_digits(65535), 5); +} + /// Indicates a position within a URL based on its components. /// /// A range of positions can be used for slicing `Url`: @@ -149,7 +172,14 @@ impl Url { } } - Position::AfterPort => self.path_start as usize, + Position::AfterPort => { + if let Some(port) = self.port { + debug_assert!(self.byte_at(self.host_end) == b':'); + self.host_end as usize + ":".len() + count_digits(port) + } else { + self.host_end as usize + } + } Position::BeforePath => self.path_start as usize, |