summaryrefslogtreecommitdiffstats
path: root/vendor/url/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-30 03:57:31 +0000
commitdc0db358abe19481e475e10c32149b53370f1a1c (patch)
treeab8ce99c4b255ce46f99ef402c27916055b899ee /vendor/url/src
parentReleasing progress-linux version 1.71.1+dfsg1-2~progress7.99u1. (diff)
downloadrustc-dc0db358abe19481e475e10c32149b53370f1a1c.tar.xz
rustc-dc0db358abe19481e475e10c32149b53370f1a1c.zip
Merging upstream version 1.72.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/url/src')
-rw-r--r--vendor/url/src/host.rs10
-rw-r--r--vendor/url/src/lib.rs114
-rw-r--r--vendor/url/src/parser.rs80
-rw-r--r--vendor/url/src/slicing.rs32
4 files changed, 207 insertions, 29 deletions
diff --git a/vendor/url/src/host.rs b/vendor/url/src/host.rs
index f1921c654..9931c2f87 100644
--- a/vendor/url/src/host.rs
+++ b/vendor/url/src/host.rs
@@ -269,7 +269,7 @@ fn ends_in_a_number(input: &str) -> bool {
} else {
last
};
- if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
+ if !last.is_empty() && last.as_bytes().iter().all(|c| c.is_ascii_digit()) {
return true;
}
@@ -297,11 +297,9 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
}
let valid_number = match r {
- 8 => input.chars().all(|c| ('0'..='7').contains(&c)),
- 10 => input.chars().all(|c| ('0'..='9').contains(&c)),
- 16 => input.chars().all(|c| {
- ('0'..='9').contains(&c) || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
- }),
+ 8 => input.as_bytes().iter().all(|c| (b'0'..=b'7').contains(c)),
+ 10 => input.as_bytes().iter().all(|c| c.is_ascii_digit()),
+ 16 => input.as_bytes().iter().all(|c| c.is_ascii_hexdigit()),
_ => false,
};
if !valid_number {
diff --git a/vendor/url/src/lib.rs b/vendor/url/src/lib.rs
index 6dc09d12f..ad3c89001 100644
--- a/vendor/url/src/lib.rs
+++ b/vendor/url/src/lib.rs
@@ -121,7 +121,7 @@ url = { version = "2", features = ["serde"] }
*/
-#![doc(html_root_url = "https://docs.rs/url/2.3.1")]
+#![doc(html_root_url = "https://docs.rs/url/2.4.0")]
#![cfg_attr(
feature = "debugger_visualizer",
feature(debugger_visualizer),
@@ -322,6 +322,32 @@ impl Url {
url
}
+ /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
+ fn strip_trailing_spaces_from_opaque_path(&mut self) {
+ if !self.cannot_be_a_base() {
+ return;
+ }
+
+ if self.fragment_start.is_some() {
+ return;
+ }
+
+ if self.query_start.is_some() {
+ return;
+ }
+
+ let trailing_space_count = self
+ .serialization
+ .chars()
+ .rev()
+ .take_while(|c| *c == ' ')
+ .count();
+
+ let start = self.serialization.len() - trailing_space_count;
+
+ self.serialization.truncate(start);
+ }
+
/// Parse a string as an URL, with this URL as the base URL.
///
/// The inverse of this is [`make_relative`].
@@ -601,7 +627,7 @@ impl Url {
}
assert!(self.scheme_end >= 1);
- assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
+ assert!(self.byte_at(0).is_ascii_alphabetic());
assert!(self
.slice(1..self.scheme_end)
.chars()
@@ -657,7 +683,14 @@ impl Url {
assert_eq!(self.host_end, self.scheme_end + 1);
assert_eq!(self.host, HostInternal::None);
assert_eq!(self.port, None);
- assert_eq!(self.path_start, self.scheme_end + 1);
+ if self.path().starts_with("//") {
+ // special case when first path segment is empty
+ assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
+ assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
+ assert_eq!(self.path_start, self.scheme_end + 3);
+ } else {
+ assert_eq!(self.path_start, self.scheme_end + 1);
+ }
}
if let Some(start) = self.query_start {
assert!(start >= self.path_start);
@@ -786,12 +819,35 @@ impl Url {
self.slice(..self.scheme_end)
}
+ /// Return whether the URL is special (has a special scheme)
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// assert!(Url::parse("http:///tmp/foo")?.is_special());
+ /// assert!(Url::parse("file:///tmp/foo")?.is_special());
+ /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn is_special(&self) -> bool {
+ let scheme_type = SchemeType::from(self.scheme());
+ scheme_type.is_special()
+ }
+
/// Return whether the URL has an 'authority',
/// which can contain a username, password, host, and port number.
///
/// URLs that do *not* are either path-only like `unix:/run/foo.socket`
/// or cannot-be-a-base like `data:text/plain,Stuff`.
///
+ /// See also the `authority` method.
+ ///
/// # Examples
///
/// ```
@@ -817,6 +873,47 @@ impl Url {
self.slice(self.scheme_end..).starts_with("://")
}
+ /// Return the authority of this URL as an ASCII string.
+ ///
+ /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
+ /// of a special URL, or percent encoded for non-special URLs.
+ /// IPv6 addresses are given between `[` and `]` brackets.
+ /// Ports are omitted if they match the well known port of a special URL.
+ ///
+ /// Username and password are percent-encoded.
+ ///
+ /// See also the `has_authority` method.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert_eq!(url.authority(), "");
+ /// let url = Url::parse("file:///tmp/foo")?;
+ /// assert_eq!(url.authority(), "");
+ /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
+ /// assert_eq!(url.authority(), "user:password@example.com");
+ /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
+ /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
+ /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
+ /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn authority(&self) -> &str {
+ let scheme_separator_len = "://".len() as u32;
+ if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
+ self.slice(self.scheme_end + scheme_separator_len..self.path_start)
+ } else {
+ ""
+ }
+ }
+
/// Return whether this URL is a cannot-be-a-base URL,
/// meaning that parsing a relative URL string with this URL as the base will return an error.
///
@@ -1391,7 +1488,8 @@ impl Url {
self.serialization.push('#');
self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input)))
} else {
- self.fragment_start = None
+ self.fragment_start = None;
+ self.strip_trailing_spaces_from_opaque_path();
}
}
@@ -1454,6 +1552,9 @@ impl Url {
parser::Input::trim_tab_and_newlines(input, vfn),
)
});
+ } else {
+ self.query_start = None;
+ self.strip_trailing_spaces_from_opaque_path();
}
self.restore_already_parsed_fragment(fragment);
@@ -1989,7 +2090,8 @@ impl Url {
if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
return Err(());
}
- if let Some(password) = password {
+ let password = password.unwrap_or_default();
+ if !password.is_empty() {
let host_and_after = self.slice(self.host_start..).to_owned();
self.serialization.truncate(self.username_end as usize);
self.serialization.push(':');
@@ -2805,7 +2907,7 @@ fn file_url_segments_to_pathbuf(
// A windows drive letter must end with a slash.
if bytes.len() > 2
- && matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
+ && bytes[bytes.len() - 2].is_ascii_alphabetic()
&& matches!(bytes[bytes.len() - 1], b':' | b'|')
{
bytes.push(b'/');
diff --git a/vendor/url/src/parser.rs b/vendor/url/src/parser.rs
index f5438c505..765cc027c 100644
--- a/vendor/url/src/parser.rs
+++ b/vendor/url/src/parser.rs
@@ -157,9 +157,11 @@ impl SchemeType {
pub fn is_file(&self) -> bool {
matches!(*self, SchemeType::File)
}
+}
- pub fn from(s: &str) -> Self {
- match s {
+impl<T: AsRef<str>> From<T> for SchemeType {
+ fn from(s: T) -> Self {
+ match s.as_ref() {
"http" | "https" | "ws" | "wss" | "ftp" => SchemeType::SpecialNotFile,
"file" => SchemeType::File,
_ => SchemeType::NotSpecial,
@@ -176,7 +178,7 @@ pub fn default_port(scheme: &str) -> Option<u16> {
}
}
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct Input<'i> {
chars: str::Chars<'i>,
}
@@ -474,9 +476,8 @@ impl<'a> Parser<'a> {
let host = HostInternal::None;
let port = None;
let remaining = if let Some(input) = input.split_prefix('/') {
- let path_start = self.serialization.len();
self.serialization.push('/');
- self.parse_path(scheme_type, &mut false, path_start, input)
+ self.parse_path(scheme_type, &mut false, path_start as usize, input)
} else {
self.parse_cannot_be_a_base_path(input)
};
@@ -1156,7 +1157,7 @@ impl<'a> Parser<'a> {
return input;
}
- if maybe_c != None && maybe_c != Some('/') {
+ if maybe_c.is_some() && maybe_c != Some('/') {
self.serialization.push('/');
}
// Otherwise, if c is not the EOF code point:
@@ -1172,7 +1173,7 @@ impl<'a> Parser<'a> {
) -> Input<'i> {
// Relative path state
loop {
- let segment_start = self.serialization.len();
+ let mut segment_start = self.serialization.len();
let mut ends_with_slash = false;
loop {
let input_before_c = input.clone();
@@ -1201,6 +1202,14 @@ impl<'a> Parser<'a> {
}
_ => {
self.check_url_code_point(c, &input);
+ if scheme_type.is_file()
+ && is_normalized_windows_drive_letter(
+ &self.serialization[path_start + 1..],
+ )
+ {
+ self.serialization.push('/');
+ segment_start += 1;
+ }
if self.context == Context::PathSegmentSetter {
if scheme_type.is_special() {
self.serialization
@@ -1248,7 +1257,10 @@ impl<'a> Parser<'a> {
}
_ => {
// If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then
- if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) {
+ if scheme_type.is_file()
+ && segment_start == path_start + 1
+ && is_windows_drive_letter(segment_before_slash)
+ {
// Replace the second code point in buffer with U+003A (:).
if let Some(c) = segment_before_slash.chars().next() {
self.serialization.truncate(segment_start);
@@ -1354,9 +1366,50 @@ impl<'a> Parser<'a> {
host_end: u32,
host: HostInternal,
port: Option<u16>,
- path_start: u32,
+ mut path_start: u32,
remaining: Input<'_>,
) -> ParseResult<Url> {
+ // Special case for anarchist URL's with a leading empty path segment
+ // This prevents web+demo:/.//not-a-host/ or web+demo:/path/..//not-a-host/,
+ // when parsed and then serialized, from ending up as web+demo://not-a-host/
+ // (they end up as web+demo:/.//not-a-host/).
+ //
+ // If url’s host is null, url does not have an opaque path,
+ // url’s path’s size is greater than 1, and url’s path[0] is the empty string,
+ // then append U+002F (/) followed by U+002E (.) to output.
+ let scheme_end_as_usize = scheme_end as usize;
+ let path_start_as_usize = path_start as usize;
+ if path_start_as_usize == scheme_end_as_usize + 1 {
+ // Anarchist URL
+ if self.serialization[path_start_as_usize..].starts_with("//") {
+ // Case 1: The base URL did not have an empty path segment, but the resulting one does
+ // Insert the "/." prefix
+ self.serialization.insert_str(path_start_as_usize, "/.");
+ path_start += 2;
+ }
+ assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
+ } else if path_start_as_usize == scheme_end_as_usize + 3
+ && &self.serialization[scheme_end_as_usize..path_start_as_usize] == ":/."
+ {
+ // Anarchist URL with leading empty path segment
+ // The base URL has a "/." between the host and the path
+ assert_eq!(self.serialization.as_bytes()[path_start_as_usize], b'/');
+ if self
+ .serialization
+ .as_bytes()
+ .get(path_start_as_usize + 1)
+ .copied()
+ != Some(b'/')
+ {
+ // Case 2: The base URL had an empty path segment, but the resulting one does not
+ // Remove the "/." prefix
+ self.serialization
+ .replace_range(scheme_end_as_usize..path_start_as_usize, ":");
+ path_start -= 2;
+ }
+ assert!(!self.serialization[scheme_end_as_usize..].starts_with("://"));
+ }
+
let (query_start, fragment_start) =
self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
Ok(Url {
@@ -1477,7 +1530,7 @@ impl<'a> Parser<'a> {
if c == '%' {
let mut input = input.clone();
if !matches!((input.next(), input.next()), (Some(a), Some(b))
- if is_ascii_hex_digit(a) && is_ascii_hex_digit(b))
+ if a.is_ascii_hexdigit() && b.is_ascii_hexdigit())
{
vfn(SyntaxViolation::PercentDecode)
}
@@ -1488,11 +1541,6 @@ impl<'a> Parser<'a> {
}
}
-#[inline]
-fn is_ascii_hex_digit(c: char) -> bool {
- matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
-}
-
// Non URL code points:
// U+0000 to U+0020 (space)
// " # % < > [ \ ] ^ ` { | }
@@ -1534,7 +1582,7 @@ fn ascii_tab_or_new_line(ch: char) -> bool {
/// https://url.spec.whatwg.org/#ascii-alpha
#[inline]
pub fn ascii_alpha(ch: char) -> bool {
- matches!(ch, 'a'..='z' | 'A'..='Z')
+ ch.is_ascii_alphabetic()
}
#[inline]
diff --git a/vendor/url/src/slicing.rs b/vendor/url/src/slicing.rs
index a90337bb6..c061fee84 100644
--- a/vendor/url/src/slicing.rs
+++ b/vendor/url/src/slicing.rs
@@ -37,6 +37,29 @@ impl Index<Range<Position>> for Url {
}
}
+// Counts how many base-10 digits are required to represent n in the given base
+fn count_digits(n: u16) -> usize {
+ match n {
+ 0..=9 => 1,
+ 10..=99 => 2,
+ 100..=999 => 3,
+ 1000..=9999 => 4,
+ 10000..=65535 => 5,
+ }
+}
+
+#[test]
+fn test_count_digits() {
+ assert_eq!(count_digits(0), 1);
+ assert_eq!(count_digits(1), 1);
+ assert_eq!(count_digits(9), 1);
+ assert_eq!(count_digits(10), 2);
+ assert_eq!(count_digits(99), 2);
+ assert_eq!(count_digits(100), 3);
+ assert_eq!(count_digits(9999), 4);
+ assert_eq!(count_digits(65535), 5);
+}
+
/// Indicates a position within a URL based on its components.
///
/// A range of positions can be used for slicing `Url`:
@@ -149,7 +172,14 @@ impl Url {
}
}
- Position::AfterPort => self.path_start as usize,
+ Position::AfterPort => {
+ if let Some(port) = self.port {
+ debug_assert!(self.byte_at(self.host_end) == b':');
+ self.host_end as usize + ":".len() + count_digits(port)
+ } else {
+ self.host_end as usize
+ }
+ }
Position::BeforePath => self.path_start as usize,