From 20431706a863f92cb37dc512fef6e48d192aaf2c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:11:38 +0200 Subject: Merging upstream version 1.66.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/url/src/host.rs | 86 +++++++++++++++++++++++++++++------------------- vendor/url/src/lib.rs | 82 +++++++++++++++++++++++++++++++-------------- vendor/url/src/origin.rs | 3 +- vendor/url/src/parser.rs | 37 +++++++-------------- vendor/url/src/quirks.rs | 59 +++++++++++++++++++++++++++------ 5 files changed, 172 insertions(+), 95 deletions(-) (limited to 'vendor/url/src') diff --git a/vendor/url/src/host.rs b/vendor/url/src/host.rs index 953743649..f1921c654 100644 --- a/vendor/url/src/host.rs +++ b/vendor/url/src/host.rs @@ -82,7 +82,9 @@ impl Host { return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain = percent_decode(input.as_bytes()).decode_utf8_lossy(); - let domain = idna::domain_to_ascii(&domain)?; + + let domain = Self::domain_to_ascii(&domain)?; + if domain.is_empty() { return Err(ParseError::EmptyHost); } @@ -90,9 +92,7 @@ impl Host { let is_invalid_domain_char = |c| { matches!( c, - '\0' | '\t' - | '\n' - | '\r' + '\0'..='\u{001F}' | ' ' | '#' | '%' @@ -106,12 +106,15 @@ impl Host { | '\\' | ']' | '^' + | '\u{007F}' + | '|' ) }; if domain.find(is_invalid_domain_char).is_some() { Err(ParseError::InvalidDomainCharacter) - } else if let Some(address) = parse_ipv4addr(&domain)? { + } else if ends_in_a_number(&domain) { + let address = parse_ipv4addr(&domain)?; Ok(Host::Ipv4(address)) } else { Ok(Host::Domain(domain)) @@ -145,6 +148,7 @@ impl Host { | '\\' | ']' | '^' + | '|' ) }; @@ -156,6 +160,11 @@ impl Host { )) } } + + /// convert domain with idna + fn domain_to_ascii(domain: &str) -> Result { + idna::domain_to_ascii(domain).map_err(Into::into) + } } impl> fmt::Display for Host { @@ -247,8 +256,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) { } } +/// +fn ends_in_a_number(input: &str) -> bool { + let mut parts = input.rsplit('.'); + let last = parts.next().unwrap(); + let last = if last.is_empty() { + if let Some(last) = parts.next() { + last + } else { + return false; + } + } else { + last + }; + if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) { + return true; + } + + parse_ipv4number(last).is_ok() +} + /// +/// Ok(None) means the input is a valid number, but it overflows a `u32`. fn parse_ipv4number(mut input: &str) -> Result, ()> { + if input.is_empty() { + return Err(()); + } + let mut r = 10; if input.starts_with("0x") || input.starts_with("0X") { input = &input[2..]; @@ -258,10 +292,10 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { r = 8; } - // At the moment we can't know the reason why from_str_radix fails - // https://github.com/rust-lang/rust/issues/22639 - // So instead we check if the input looks like a real number and only return - // an error when it's an overflow. + if input.is_empty() { + return Ok(Some(0)); + } + let valid_number = match r { 8 => input.chars().all(|c| ('0'..='7').contains(&c)), 10 => input.chars().all(|c| ('0'..='9').contains(&c)), @@ -270,50 +304,34 @@ fn parse_ipv4number(mut input: &str) -> Result, ()> { }), _ => false, }; - if !valid_number { - return Ok(None); + return Err(()); } - if input.is_empty() { - return Ok(Some(0)); - } - if input.starts_with('+') { - return Ok(None); - } match u32::from_str_radix(input, r) { - Ok(number) => Ok(Some(number)), - Err(_) => Err(()), + Ok(num) => Ok(Some(num)), + Err(_) => Ok(None), // The only possible error kind here is an integer overflow. + // The validity of the chars in the input is checked above. } } /// -fn parse_ipv4addr(input: &str) -> ParseResult> { - if input.is_empty() { - return Ok(None); - } +fn parse_ipv4addr(input: &str) -> ParseResult { let mut parts: Vec<&str> = input.split('.').collect(); if parts.last() == Some(&"") { parts.pop(); } if parts.len() > 4 { - return Ok(None); + return Err(ParseError::InvalidIpv4Address); } let mut numbers: Vec = Vec::new(); - let mut overflow = false; for part in parts { - if part.is_empty() { - return Ok(None); - } match parse_ipv4number(part) { Ok(Some(n)) => numbers.push(n), - Ok(None) => return Ok(None), - Err(()) => overflow = true, + Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow + Err(()) => return Err(ParseError::InvalidIpv4Address), }; } - if overflow { - return Err(ParseError::InvalidIpv4Address); - } let mut ipv4 = numbers.pop().expect("a non-empty list of numbers"); // Equivalent to: ipv4 >= 256 ** (4 − numbers.len()) if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) { @@ -325,7 +343,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult> { for (counter, n) in numbers.iter().enumerate() { ipv4 += n << (8 * (3 - counter as u32)) } - Ok(Some(Ipv4Addr::from(ipv4))) + Ok(Ipv4Addr::from(ipv4)) } /// diff --git a/vendor/url/src/lib.rs b/vendor/url/src/lib.rs index 42793cf48..6dc09d12f 100644 --- a/vendor/url/src/lib.rs +++ b/vendor/url/src/lib.rs @@ -118,12 +118,16 @@ See [serde documentation](https://serde.rs) for more information. ```toml url = { version = "2", features = ["serde"] } ``` + */ -#![doc(html_root_url = "https://docs.rs/url/2.2.2")] +#![doc(html_root_url = "https://docs.rs/url/2.3.1")] +#![cfg_attr( + feature = "debugger_visualizer", + feature(debugger_visualizer), + debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis") +)] -#[macro_use] -extern crate matches; pub use form_urlencoded; #[cfg(feature = "serde")] @@ -460,7 +464,7 @@ impl Url { } // Add the filename if they are not the same - if base_filename != url_filename { + if !relative.is_empty() || base_filename != url_filename { // If the URIs filename is empty this means that it was a directory // so we'll have to append a '/'. // @@ -1234,14 +1238,9 @@ impl Url { /// # } /// # run().unwrap(); /// ``` - #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36 pub fn path_segments(&self) -> Option> { let path = self.path(); - if path.starts_with('/') { - Some(path[1..].split('/')) - } else { - None - } + path.strip_prefix('/').map(|remainder| remainder.split('/')) } /// Return this URL’s query string, if any, as a percent-encoded ASCII string. @@ -1304,7 +1303,7 @@ impl Url { /// # Ok(()) /// # } /// # run().unwrap(); - /// + /// ``` #[inline] pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> { @@ -1351,7 +1350,7 @@ impl Url { } fn mutate) -> R, R>(&mut self, f: F) -> R { - let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new())); + let mut parser = Parser::for_setter(mem::take(&mut self.serialization)); let result = f(&mut parser); self.serialization = parser.serialization; result @@ -1541,6 +1540,19 @@ impl Url { /// url.set_path("data/report.csv"); /// assert_eq!(url.as_str(), "https://example.com/data/report.csv"); /// assert_eq!(url.path(), "/data/report.csv"); + /// + /// // `set_path` percent-encodes the given string if it's not already percent-encoded. + /// let mut url = Url::parse("https://example.com")?; + /// url.set_path("api/some comments"); + /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); + /// assert_eq!(url.path(), "/api/some%20comments"); + /// + /// // `set_path` will not double percent-encode the string if it's already percent-encoded. + /// let mut url = Url::parse("https://example.com")?; + /// url.set_path("api/some%20comments"); + /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments"); + /// assert_eq!(url.path(), "/api/some%20comments"); + /// /// # Ok(()) /// # } /// # run().unwrap(); @@ -1792,8 +1804,10 @@ impl Url { return Err(ParseError::SetHostOnCannotBeABaseUrl); } + let scheme_type = SchemeType::from(self.scheme()); + if let Some(host) = host { - if host.is_empty() && SchemeType::from(self.scheme()).is_special() { + if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() { return Err(ParseError::EmptyHost); } let mut host_substr = host; @@ -1817,15 +1831,20 @@ impl Url { self.set_host_internal(Host::parse_opaque(host_substr)?, None); } } else if self.has_host() { - let scheme_type = SchemeType::from(self.scheme()); - if scheme_type.is_special() { + if scheme_type.is_special() && !scheme_type.is_file() { return Err(ParseError::EmptyHost); } else if self.serialization.len() == self.path_start as usize { self.serialization.push('/'); } debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.byte_at(self.path_start) == b'/'); - let new_path_start = self.scheme_end + 1; + + let new_path_start = if scheme_type.is_file() { + self.scheme_end + 3 + } else { + self.scheme_end + 1 + }; + self.serialization .drain(new_path_start as usize..self.path_start as usize); let offset = self.path_start - new_path_start; @@ -2127,7 +2146,7 @@ impl Url { /// /// # Examples /// - /// Change the URL’s scheme from `https` to `foo`: + /// Change the URL’s scheme from `https` to `http`: /// /// ``` /// use url::Url; @@ -2298,7 +2317,7 @@ impl Url { /// # run().unwrap(); /// # } /// ``` - #[cfg(any(unix, windows, target_os = "redox"))] + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn from_file_path>(path: P) -> Result { let mut serialization = "file://".to_owned(); @@ -2335,7 +2354,7 @@ impl Url { /// /// Note that `std::path` does not consider trailing slashes significant /// and usually does not include them (e.g. in `Path::parent()`). - #[cfg(any(unix, windows, target_os = "redox"))] + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn from_directory_path>(path: P) -> Result { let mut url = Url::from_file_path(path)?; @@ -2452,7 +2471,7 @@ impl Url { /// (That is, if the percent-decoded path contains a NUL byte or, /// for a Windows path, is not UTF-8.) #[inline] - #[cfg(any(unix, windows, target_os = "redox"))] + #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] #[allow(clippy::result_unit_err)] pub fn to_file_path(&self) -> Result { if let Some(segments) = self.path_segments() { @@ -2511,7 +2530,7 @@ impl fmt::Display for Url { } } -/// String converstion. +/// String conversion. impl From for String { fn from(value: Url) -> String { value.serialization @@ -2656,12 +2675,15 @@ impl<'de> serde::Deserialize<'de> for Url { } } -#[cfg(any(unix, target_os = "redox"))] +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] fn path_to_file_url_segments( path: &Path, serialization: &mut String, ) -> Result<(u32, HostInternal), ()> { + #[cfg(any(unix, target_os = "redox"))] use std::os::unix::prelude::OsStrExt; + #[cfg(target_os = "wasi")] + use std::os::wasi::prelude::OsStrExt; if !path.is_absolute() { return Err(()); } @@ -2706,6 +2728,7 @@ fn path_to_file_url_segments_windows( let host_start = serialization.len() + 1; let host_end; let host_internal; + match components.next() { Some(Component::Prefix(ref p)) => match p.kind() { Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { @@ -2726,7 +2749,6 @@ fn path_to_file_url_segments_windows( } _ => return Err(()), }, - _ => return Err(()), } @@ -2735,12 +2757,15 @@ fn path_to_file_url_segments_windows( if component == Component::RootDir { continue; } + path_only_has_prefix = false; // FIXME: somehow work with non-unicode? let component = component.as_os_str().to_str().ok_or(())?; + serialization.push('/'); serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); } + // A windows drive letter must end with a slash. if serialization.len() > host_start && parser::is_windows_drive_letter(&serialization[host_start..]) @@ -2748,16 +2773,20 @@ fn path_to_file_url_segments_windows( { serialization.push('/'); } + Ok((host_end, host_internal)) } -#[cfg(any(unix, target_os = "redox"))] +#[cfg(any(unix, target_os = "redox", target_os = "wasi"))] fn file_url_segments_to_pathbuf( host: Option<&str>, segments: str::Split<'_, char>, ) -> Result { use std::ffi::OsStr; + #[cfg(any(unix, target_os = "redox"))] use std::os::unix::prelude::OsStrExt; + #[cfg(target_os = "wasi")] + use std::os::wasi::prelude::OsStrExt; if host.is_some() { return Err(()); @@ -2768,10 +2797,12 @@ fn file_url_segments_to_pathbuf( } else { Vec::new() }; + for segment in segments { bytes.push(b'/'); bytes.extend(percent_decode(segment.as_bytes())); } + // A windows drive letter must end with a slash. if bytes.len() > 2 && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') @@ -2779,12 +2810,15 @@ fn file_url_segments_to_pathbuf( { bytes.push(b'/'); } + let os_str = OsStr::from_bytes(&bytes); let path = PathBuf::from(os_str); + debug_assert!( path.is_absolute(), "to_file_path() failed to produce an absolute Path" ); + Ok(path) } diff --git a/vendor/url/src/origin.rs b/vendor/url/src/origin.rs index be2d948b8..81193f510 100644 --- a/vendor/url/src/origin.rs +++ b/vendor/url/src/origin.rs @@ -9,7 +9,6 @@ use crate::host::Host; use crate::parser::default_port; use crate::Url; -use idna::domain_to_unicode; use std::sync::atomic::{AtomicUsize, Ordering}; pub fn url_origin(url: &Url) -> Origin { @@ -93,7 +92,7 @@ impl Origin { Origin::Tuple(ref scheme, ref host, port) => { let host = match *host { Host::Domain(ref domain) => { - let (domain, _errors) = domain_to_unicode(domain); + let (domain, _errors) = idna::domain_to_unicode(domain); Host::Domain(domain) } _ => host.clone(), diff --git a/vendor/url/src/parser.rs b/vendor/url/src/parser.rs index 57be11052..f5438c505 100644 --- a/vendor/url/src/parser.rs +++ b/vendor/url/src/parser.rs @@ -52,15 +52,12 @@ macro_rules! simple_enum_error { /// /// This may be extended in the future so exhaustive matching is /// discouraged with an unused variant. - #[allow(clippy::manual_non_exhaustive)] // introduced in 1.40, MSRV is 1.36 #[derive(PartialEq, Eq, Clone, Copy, Debug)] + #[non_exhaustive] pub enum ParseError { $( $name, )+ - /// Unused variant enable non-exhaustive matching - #[doc(hidden)] - __FutureProof, } impl fmt::Display for ParseError { @@ -69,9 +66,6 @@ macro_rules! simple_enum_error { $( ParseError::$name => fmt.write_str($description), )+ - ParseError::__FutureProof => { - unreachable!("Don't abuse the FutureProof!"); - } } } } @@ -105,15 +99,12 @@ macro_rules! syntax_violation_enum { /// /// This may be extended in the future so exhaustive matching is /// discouraged with an unused variant. - #[allow(clippy::manual_non_exhaustive)] // introduced in 1.40, MSRV is 1.36 #[derive(PartialEq, Eq, Clone, Copy, Debug)] + #[non_exhaustive] pub enum SyntaxViolation { $( $name, )+ - /// Unused variant enable non-exhaustive matching - #[doc(hidden)] - __FutureProof, } impl SyntaxViolation { @@ -122,9 +113,6 @@ macro_rules! syntax_violation_enum { $( SyntaxViolation::$name => $description, )+ - SyntaxViolation::__FutureProof => { - unreachable!("Don't abuse the FutureProof!"); - } } } } @@ -154,7 +142,7 @@ impl fmt::Display for SyntaxViolation { } } -#[derive(Copy, Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq, Eq)] pub enum SchemeType { File, SpecialNotFile, @@ -1227,13 +1215,11 @@ impl<'a> Parser<'a> { } } } - // Going from &str to String to &str to please the 1.33.0 borrow checker - let before_slash_string = if ends_with_slash { - self.serialization[segment_start..self.serialization.len() - 1].to_owned() + let segment_before_slash = if ends_with_slash { + &self.serialization[segment_start..self.serialization.len() - 1] } else { - self.serialization[segment_start..self.serialization.len()].to_owned() + &self.serialization[segment_start..self.serialization.len()] }; - let segment_before_slash: &str = &before_slash_string; match segment_before_slash { // If buffer is a double-dot path segment, shorten url’s path, ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" @@ -1292,7 +1278,7 @@ impl<'a> Parser<'a> { //FIXME: log violation let path = self.serialization.split_off(path_start); self.serialization.push('/'); - self.serialization.push_str(&path.trim_start_matches('/')); + self.serialization.push_str(path.trim_start_matches('/')); } input @@ -1423,7 +1409,8 @@ impl<'a> Parser<'a> { scheme_end: u32, mut input: Input<'i>, ) -> Option> { - let mut query = String::new(); // FIXME: use a streaming decoder instead + let len = input.chars.as_str().len(); + let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead let mut remaining = None; while let Some(c) = input.next() { if c == '#' && self.context == Context::UrlParser { @@ -1563,17 +1550,17 @@ fn is_normalized_windows_drive_letter(segment: &str) -> bool { is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':' } -/// Wether the scheme is file:, the path has a single segment, and that segment +/// Whether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter #[inline] pub fn is_windows_drive_letter(segment: &str) -> bool { segment.len() == 2 && starts_with_windows_drive_letter(segment) } -/// Wether path starts with a root slash +/// Whether path starts with a root slash /// and a windows drive letter eg: "/c:" or "/a:/" fn path_starts_with_windows_drive_letter(s: &str) -> bool { - if let Some(c) = s.as_bytes().get(0) { + if let Some(c) = s.as_bytes().first() { matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..]) } else { false diff --git a/vendor/url/src/quirks.rs b/vendor/url/src/quirks.rs index 0dbc6eb44..0674ebb62 100644 --- a/vendor/url/src/quirks.rs +++ b/vendor/url/src/quirks.rs @@ -14,6 +14,49 @@ use crate::parser::{default_port, Context, Input, Parser, SchemeType}; use crate::{Host, ParseError, Position, Url}; +/// Internal components / offsets of a URL. +/// +/// https://user@pass:example.com:1234/foo/bar?baz#quux +/// | | | | ^^^^| | | +/// | | | | | | | `----- fragment_start +/// | | | | | | `--------- query_start +/// | | | | | `----------------- path_start +/// | | | | `--------------------- port +/// | | | `----------------------- host_end +/// | | `---------------------------------- host_start +/// | `--------------------------------------- username_end +/// `---------------------------------------------- scheme_end +#[derive(Copy, Clone)] +#[cfg(feature = "expose_internals")] +pub struct InternalComponents { + pub scheme_end: u32, + pub username_end: u32, + pub host_start: u32, + pub host_end: u32, + pub port: Option, + pub path_start: u32, + pub query_start: Option, + pub fragment_start: Option, +} + +/// Internal component / parsed offsets of the URL. +/// +/// This can be useful for implementing efficient serialization +/// for the URL. +#[cfg(feature = "expose_internals")] +pub fn internal_components(url: &Url) -> InternalComponents { + InternalComponents { + scheme_end: url.scheme_end, + username_end: url.username_end, + host_start: url.host_start, + host_end: url.host_end, + port: url.port, + path_start: url.path_start, + query_start: url.query_start, + fragment_start: url.fragment_start, + } +} + /// https://url.spec.whatwg.org/#dom-url-domaintoascii pub fn domain_to_ascii(domain: &str) -> String { match Host::parse(domain) { @@ -138,14 +181,10 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { } } // Make sure we won't set an empty host to a url with a username or a port - if host == Host::Domain("".to_string()) { - if !username(&url).is_empty() { - return Err(()); - } else if let Some(Some(_)) = opt_port { - return Err(()); - } else if url.port().is_some() { - return Err(()); - } + if host == Host::Domain("".to_string()) + && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some()) + { + return Err(()); } url.set_host_internal(host, opt_port); Ok(()) @@ -177,10 +216,10 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { // Empty host on special not file url if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile // Port with an empty host - ||!port(&url).is_empty() + ||!port(url).is_empty() // Empty host that includes credentials || !url.username().is_empty() - || !url.password().unwrap_or(&"").is_empty() + || !url.password().unwrap_or("").is_empty() { return Err(()); } -- cgit v1.2.3