summaryrefslogtreecommitdiffstats
path: root/third_party/rust/url/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/url/src
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/url/src')
-rw-r--r--third_party/rust/url/src/form_urlencoded.rs347
-rw-r--r--third_party/rust/url/src/host.rs454
-rw-r--r--third_party/rust/url/src/lib.rs2553
-rw-r--r--third_party/rust/url/src/origin.rs113
-rw-r--r--third_party/rust/url/src/parser.rs1426
-rw-r--r--third_party/rust/url/src/path_segments.rs229
-rw-r--r--third_party/rust/url/src/query_encoding.rs35
-rw-r--r--third_party/rust/url/src/quirks.rs226
-rw-r--r--third_party/rust/url/src/slicing.rs187
9 files changed, 5570 insertions, 0 deletions
diff --git a/third_party/rust/url/src/form_urlencoded.rs b/third_party/rust/url/src/form_urlencoded.rs
new file mode 100644
index 0000000000..b97f35d25a
--- /dev/null
+++ b/third_party/rust/url/src/form_urlencoded.rs
@@ -0,0 +1,347 @@
+// Copyright 2013-2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
+//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
+//! as used by HTML forms.
+//!
+//! Converts between a string (such as an URL’s query string)
+//! and a sequence of (name, value) pairs.
+
+use percent_encoding::{percent_decode, percent_encode_byte};
+use query_encoding::{self, decode_utf8_lossy, EncodingOverride};
+use std::borrow::{Borrow, Cow};
+use std::str;
+
+/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
+/// into a iterator of (name, value) pairs.
+///
+/// Use `parse(input.as_bytes())` to parse a `&str` string.
+///
+/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
+/// converted to `[("#first", "%try%")]`.
+#[inline]
+pub fn parse(input: &[u8]) -> Parse {
+ Parse { input }
+}
+/// The return type of `parse()`.
+#[derive(Copy, Clone)]
+pub struct Parse<'a> {
+ input: &'a [u8],
+}
+
+impl<'a> Iterator for Parse<'a> {
+ type Item = (Cow<'a, str>, Cow<'a, str>);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ loop {
+ if self.input.is_empty() {
+ return None;
+ }
+ let mut split2 = self.input.splitn(2, |&b| b == b'&');
+ let sequence = split2.next().unwrap();
+ self.input = split2.next().unwrap_or(&[][..]);
+ if sequence.is_empty() {
+ continue;
+ }
+ let mut split2 = sequence.splitn(2, |&b| b == b'=');
+ let name = split2.next().unwrap();
+ let value = split2.next().unwrap_or(&[][..]);
+ return Some((decode(name), decode(value)));
+ }
+ }
+}
+
+fn decode(input: &[u8]) -> Cow<str> {
+ let replaced = replace_plus(input);
+ decode_utf8_lossy(match percent_decode(&replaced).into() {
+ Cow::Owned(vec) => Cow::Owned(vec),
+ Cow::Borrowed(_) => replaced,
+ })
+}
+
+/// Replace b'+' with b' '
+fn replace_plus(input: &[u8]) -> Cow<[u8]> {
+ match input.iter().position(|&b| b == b'+') {
+ None => Cow::Borrowed(input),
+ Some(first_position) => {
+ let mut replaced = input.to_owned();
+ replaced[first_position] = b' ';
+ for byte in &mut replaced[first_position + 1..] {
+ if *byte == b'+' {
+ *byte = b' ';
+ }
+ }
+ Cow::Owned(replaced)
+ }
+ }
+}
+
+impl<'a> Parse<'a> {
+ /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
+ pub fn into_owned(self) -> ParseIntoOwned<'a> {
+ ParseIntoOwned { inner: self }
+ }
+}
+
+/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
+pub struct ParseIntoOwned<'a> {
+ inner: Parse<'a>,
+}
+
+impl<'a> Iterator for ParseIntoOwned<'a> {
+ type Item = (String, String);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.inner
+ .next()
+ .map(|(k, v)| (k.into_owned(), v.into_owned()))
+ }
+}
+
+/// The [`application/x-www-form-urlencoded` byte serializer](
+/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
+///
+/// Return an iterator of `&str` slices.
+pub fn byte_serialize(input: &[u8]) -> ByteSerialize {
+ ByteSerialize { bytes: input }
+}
+
+/// Return value of `byte_serialize()`.
+#[derive(Debug)]
+pub struct ByteSerialize<'a> {
+ bytes: &'a [u8],
+}
+
+fn byte_serialized_unchanged(byte: u8) -> bool {
+ matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
+}
+
+impl<'a> Iterator for ByteSerialize<'a> {
+ type Item = &'a str;
+
+ fn next(&mut self) -> Option<&'a str> {
+ if let Some((&first, tail)) = self.bytes.split_first() {
+ if !byte_serialized_unchanged(first) {
+ self.bytes = tail;
+ return Some(if first == b' ' {
+ "+"
+ } else {
+ percent_encode_byte(first)
+ });
+ }
+ let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
+ let (unchanged_slice, remaining) = match position {
+ // 1 for first_byte + i unchanged in tail
+ Some(i) => self.bytes.split_at(1 + i),
+ None => (self.bytes, &[][..]),
+ };
+ self.bytes = remaining;
+ Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
+ } else {
+ None
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.bytes.is_empty() {
+ (0, Some(0))
+ } else {
+ (1, Some(self.bytes.len()))
+ }
+ }
+}
+
+/// The [`application/x-www-form-urlencoded` serializer](
+/// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
+pub struct Serializer<'a, T: Target> {
+ target: Option<T>,
+ start_position: usize,
+ encoding: EncodingOverride<'a>,
+}
+
+pub trait Target {
+ fn as_mut_string(&mut self) -> &mut String;
+ fn finish(self) -> Self::Finished;
+ type Finished;
+}
+
+impl Target for String {
+ fn as_mut_string(&mut self) -> &mut String {
+ self
+ }
+ fn finish(self) -> Self {
+ self
+ }
+ type Finished = Self;
+}
+
+impl<'a> Target for &'a mut String {
+ fn as_mut_string(&mut self) -> &mut String {
+ &mut **self
+ }
+ fn finish(self) -> Self {
+ self
+ }
+ type Finished = Self;
+}
+
+// `as_mut_string` string here exposes the internal serialization of an `Url`,
+// which should not be exposed to users.
+// We achieve that by not giving users direct access to `UrlQuery`:
+// * Its fields are private
+// (and so can not be constructed with struct literal syntax outside of this crate),
+// * It has no constructor
+// * It is only visible (on the type level) to users in the return type of
+// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
+// * `Serializer` keeps its target in a private field
+// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
+impl<'a> Target for ::UrlQuery<'a> {
+ fn as_mut_string(&mut self) -> &mut String {
+ &mut self.url.as_mut().unwrap().serialization
+ }
+
+ fn finish(mut self) -> &'a mut ::Url {
+ let url = self.url.take().unwrap();
+ url.restore_already_parsed_fragment(self.fragment.take());
+ url
+ }
+
+ type Finished = &'a mut ::Url;
+}
+
+impl<'a, T: Target> Serializer<'a, T> {
+ /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
+ ///
+ /// If the target is non-empty,
+ /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
+ pub fn new(target: T) -> Self {
+ Self::for_suffix(target, 0)
+ }
+
+ /// Create a new `application/x-www-form-urlencoded` serializer
+ /// for a suffix of the given target.
+ ///
+ /// If that suffix is non-empty,
+ /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
+ pub fn for_suffix(mut target: T, start_position: usize) -> Self {
+ &target.as_mut_string()[start_position..]; // Panic if out of bounds
+ Serializer {
+ target: Some(target),
+ start_position,
+ encoding: None,
+ }
+ }
+
+ /// Remove any existing name/value pair.
+ ///
+ /// Panics if called after `.finish()`.
+ pub fn clear(&mut self) -> &mut Self {
+ string(&mut self.target).truncate(self.start_position);
+ self
+ }
+
+ /// Set the character encoding to be used for names and values before percent-encoding.
+ pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
+ self.encoding = new;
+ self
+ }
+
+ /// Serialize and append a name/value pair.
+ ///
+ /// Panics if called after `.finish()`.
+ pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
+ append_pair(
+ string(&mut self.target),
+ self.start_position,
+ self.encoding,
+ name,
+ value,
+ );
+ self
+ }
+
+ /// Serialize and append a number of name/value pairs.
+ ///
+ /// This simply calls `append_pair` repeatedly.
+ /// This can be more convenient, so the user doesn’t need to introduce a block
+ /// to limit the scope of `Serializer`’s borrow of its string.
+ ///
+ /// Panics if called after `.finish()`.
+ pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
+ where
+ I: IntoIterator,
+ I::Item: Borrow<(K, V)>,
+ K: AsRef<str>,
+ V: AsRef<str>,
+ {
+ {
+ let string = string(&mut self.target);
+ for pair in iter {
+ let &(ref k, ref v) = pair.borrow();
+ append_pair(
+ string,
+ self.start_position,
+ self.encoding,
+ k.as_ref(),
+ v.as_ref(),
+ );
+ }
+ }
+ self
+ }
+
+ /// If this serializer was constructed with a string, take and return that string.
+ ///
+ /// ```rust
+ /// use url::form_urlencoded;
+ /// let encoded: String = form_urlencoded::Serializer::new(String::new())
+ /// .append_pair("foo", "bar & baz")
+ /// .append_pair("saison", "Été+hiver")
+ /// .finish();
+ /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
+ /// ```
+ ///
+ /// Panics if called more than once.
+ pub fn finish(&mut self) -> T::Finished {
+ self.target
+ .take()
+ .expect("url::form_urlencoded::Serializer double finish")
+ .finish()
+ }
+}
+
+fn append_separator_if_needed(string: &mut String, start_position: usize) {
+ if string.len() > start_position {
+ string.push('&')
+ }
+}
+
+fn string<T: Target>(target: &mut Option<T>) -> &mut String {
+ target
+ .as_mut()
+ .expect("url::form_urlencoded::Serializer finished")
+ .as_mut_string()
+}
+
+fn append_pair(
+ string: &mut String,
+ start_position: usize,
+ encoding: EncodingOverride,
+ name: &str,
+ value: &str,
+) {
+ append_separator_if_needed(string, start_position);
+ append_encoded(name, string, encoding);
+ string.push('=');
+ append_encoded(value, string, encoding);
+}
+
+fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride) {
+ string.extend(byte_serialize(&query_encoding::encode(encoding, s.into())))
+}
diff --git a/third_party/rust/url/src/host.rs b/third_party/rust/url/src/host.rs
new file mode 100644
index 0000000000..9afc6d8e74
--- /dev/null
+++ b/third_party/rust/url/src/host.rs
@@ -0,0 +1,454 @@
+// Copyright 2013-2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use idna;
+use parser::{ParseError, ParseResult};
+use percent_encoding::{percent_decode, utf8_percent_encode, CONTROLS};
+#[cfg(feature = "serde")]
+use serde::{Deserialize, Serialize};
+use std::cmp;
+use std::fmt::{self, Formatter};
+use std::net::{Ipv4Addr, Ipv6Addr};
+
+#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub(crate) enum HostInternal {
+ None,
+ Domain,
+ Ipv4(Ipv4Addr),
+ Ipv6(Ipv6Addr),
+}
+
+impl<S> From<Host<S>> for HostInternal {
+ fn from(host: Host<S>) -> HostInternal {
+ match host {
+ Host::Domain(_) => HostInternal::Domain,
+ Host::Ipv4(address) => HostInternal::Ipv4(address),
+ Host::Ipv6(address) => HostInternal::Ipv6(address),
+ }
+ }
+}
+
+/// The host name of an URL.
+#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
+#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Host<S = String> {
+ /// A DNS domain name, as '.' dot-separated labels.
+ /// Non-ASCII labels are encoded in punycode per IDNA if this is the host of
+ /// a special URL, or percent encoded for non-special URLs. Hosts for
+ /// non-special URLs are also called opaque hosts.
+ Domain(S),
+
+ /// An IPv4 address.
+ /// `Url::host_str` returns the serialization of this address,
+ /// as four decimal integers separated by `.` dots.
+ Ipv4(Ipv4Addr),
+
+ /// An IPv6 address.
+ /// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
+ /// in the format per [RFC 5952 *A Recommendation
+ /// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
+ /// lowercase hexadecimal with maximal `::` compression.
+ Ipv6(Ipv6Addr),
+}
+
+impl<'a> Host<&'a str> {
+ /// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
+ pub fn to_owned(&self) -> Host<String> {
+ match *self {
+ Host::Domain(domain) => Host::Domain(domain.to_owned()),
+ Host::Ipv4(address) => Host::Ipv4(address),
+ Host::Ipv6(address) => Host::Ipv6(address),
+ }
+ }
+}
+
+impl Host<String> {
+ /// Parse a host: either an IPv6 address in [] square brackets, or a domain.
+ ///
+ /// <https://url.spec.whatwg.org/#host-parsing>
+ pub fn parse(input: &str) -> Result<Self, ParseError> {
+ if input.starts_with('[') {
+ if !input.ends_with(']') {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
+ }
+ let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
+ let domain = idna::domain_to_ascii(&domain)?;
+ if domain
+ .find(|c| {
+ matches!(
+ c,
+ '\0' | '\t'
+ | '\n'
+ | '\r'
+ | ' '
+ | '#'
+ | '%'
+ | '/'
+ | ':'
+ | '?'
+ | '@'
+ | '['
+ | '\\'
+ | ']'
+ )
+ })
+ .is_some()
+ {
+ return Err(ParseError::InvalidDomainCharacter);
+ }
+ if let Some(address) = parse_ipv4addr(&domain)? {
+ Ok(Host::Ipv4(address))
+ } else {
+ Ok(Host::Domain(domain.into()))
+ }
+ }
+
+ // <https://url.spec.whatwg.org/#concept-opaque-host-parser>
+ pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
+ if input.starts_with('[') {
+ if !input.ends_with(']') {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
+ }
+ if input
+ .find(|c| {
+ matches!(
+ c,
+ '\0' | '\t'
+ | '\n'
+ | '\r'
+ | ' '
+ | '#'
+ | '/'
+ | ':'
+ | '?'
+ | '@'
+ | '['
+ | '\\'
+ | ']'
+ )
+ })
+ .is_some()
+ {
+ return Err(ParseError::InvalidDomainCharacter);
+ }
+ let s = utf8_percent_encode(input, CONTROLS).to_string();
+ Ok(Host::Domain(s))
+ }
+}
+
+impl<S: AsRef<str>> fmt::Display for Host<S> {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ match *self {
+ Host::Domain(ref domain) => domain.as_ref().fmt(f),
+ Host::Ipv4(ref addr) => addr.fmt(f),
+ Host::Ipv6(ref addr) => {
+ f.write_str("[")?;
+ write_ipv6(addr, f)?;
+ f.write_str("]")
+ }
+ }
+ }
+}
+
+fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
+ let segments = addr.segments();
+ let (compress_start, compress_end) = longest_zero_sequence(&segments);
+ let mut i = 0;
+ while i < 8 {
+ if i == compress_start {
+ f.write_str(":")?;
+ if i == 0 {
+ f.write_str(":")?;
+ }
+ if compress_end < 8 {
+ i = compress_end;
+ } else {
+ break;
+ }
+ }
+ write!(f, "{:x}", segments[i as usize])?;
+ if i < 7 {
+ f.write_str(":")?;
+ }
+ i += 1;
+ }
+ Ok(())
+}
+
+// https://url.spec.whatwg.org/#concept-ipv6-serializer step 2 and 3
+fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
+ let mut longest = -1;
+ let mut longest_length = -1;
+ let mut start = -1;
+ macro_rules! finish_sequence(
+ ($end: expr) => {
+ if start >= 0 {
+ let length = $end - start;
+ if length > longest_length {
+ longest = start;
+ longest_length = length;
+ }
+ }
+ };
+ );
+ for i in 0..8 {
+ if pieces[i as usize] == 0 {
+ if start < 0 {
+ start = i;
+ }
+ } else {
+ finish_sequence!(i);
+ start = -1;
+ }
+ }
+ finish_sequence!(8);
+ // https://url.spec.whatwg.org/#concept-ipv6-serializer
+ // step 3: ignore lone zeroes
+ if longest_length < 2 {
+ (-1, -2)
+ } else {
+ (longest, longest + longest_length)
+ }
+}
+
+/// <https://url.spec.whatwg.org/#ipv4-number-parser>
+fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
+ let mut r = 10;
+ if input.starts_with("0x") || input.starts_with("0X") {
+ input = &input[2..];
+ r = 16;
+ } else if input.len() >= 2 && input.starts_with('0') {
+ input = &input[1..];
+ r = 8;
+ }
+
+ // At the moment we can't know the reason why from_str_radix fails
+ // https://github.com/rust-lang/rust/issues/22639
+ // So instead we check if the input looks like a real number and only return
+ // an error when it's an overflow.
+ let valid_number = match r {
+ 8 => input.chars().all(|c| c >= '0' && c <= '7'),
+ 10 => input.chars().all(|c| c >= '0' && c <= '9'),
+ 16 => input
+ .chars()
+ .all(|c| (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')),
+ _ => false,
+ };
+
+ if !valid_number {
+ return Ok(None);
+ }
+
+ if input.is_empty() {
+ return Ok(Some(0));
+ }
+ if input.starts_with('+') {
+ return Ok(None);
+ }
+ match u32::from_str_radix(input, r) {
+ Ok(number) => Ok(Some(number)),
+ Err(_) => Err(()),
+ }
+}
+
+/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
+fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
+ if input.is_empty() {
+ return Ok(None);
+ }
+ let mut parts: Vec<&str> = input.split('.').collect();
+ if parts.last() == Some(&"") {
+ parts.pop();
+ }
+ if parts.len() > 4 {
+ return Ok(None);
+ }
+ let mut numbers: Vec<u32> = Vec::new();
+ let mut overflow = false;
+ for part in parts {
+ if part == "" {
+ return Ok(None);
+ }
+ match parse_ipv4number(part) {
+ Ok(Some(n)) => numbers.push(n),
+ Ok(None) => return Ok(None),
+ Err(()) => overflow = true,
+ };
+ }
+ if overflow {
+ return Err(ParseError::InvalidIpv4Address);
+ }
+ let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
+ // Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
+ if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
+ return Err(ParseError::InvalidIpv4Address);
+ }
+ if numbers.iter().any(|x| *x > 255) {
+ return Err(ParseError::InvalidIpv4Address);
+ }
+ for (counter, n) in numbers.iter().enumerate() {
+ ipv4 += n << (8 * (3 - counter as u32))
+ }
+ Ok(Some(Ipv4Addr::from(ipv4)))
+}
+
+/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
+fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
+ let input = input.as_bytes();
+ let len = input.len();
+ let mut is_ip_v4 = false;
+ let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
+ let mut piece_pointer = 0;
+ let mut compress_pointer = None;
+ let mut i = 0;
+
+ if len < 2 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+
+ if input[0] == b':' {
+ if input[1] != b':' {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ i = 2;
+ piece_pointer = 1;
+ compress_pointer = Some(1);
+ }
+
+ while i < len {
+ if piece_pointer == 8 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ if input[i] == b':' {
+ if compress_pointer.is_some() {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ i += 1;
+ piece_pointer += 1;
+ compress_pointer = Some(piece_pointer);
+ continue;
+ }
+ let start = i;
+ let end = cmp::min(len, start + 4);
+ let mut value = 0u16;
+ while i < end {
+ match (input[i] as char).to_digit(16) {
+ Some(digit) => {
+ value = value * 0x10 + digit as u16;
+ i += 1;
+ }
+ None => break,
+ }
+ }
+ if i < len {
+ match input[i] {
+ b'.' => {
+ if i == start {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ i = start;
+ if piece_pointer > 6 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ is_ip_v4 = true;
+ }
+ b':' => {
+ i += 1;
+ if i == len {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ }
+ _ => return Err(ParseError::InvalidIpv6Address),
+ }
+ }
+ if is_ip_v4 {
+ break;
+ }
+ pieces[piece_pointer] = value;
+ piece_pointer += 1;
+ }
+
+ if is_ip_v4 {
+ if piece_pointer > 6 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ let mut numbers_seen = 0;
+ while i < len {
+ if numbers_seen > 0 {
+ if numbers_seen < 4 && (i < len && input[i] == b'.') {
+ i += 1
+ } else {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ }
+
+ let mut ipv4_piece = None;
+ while i < len {
+ let digit = match input[i] {
+ c @ b'0'..=b'9' => c - b'0',
+ _ => break,
+ };
+ match ipv4_piece {
+ None => ipv4_piece = Some(digit as u16),
+ Some(0) => return Err(ParseError::InvalidIpv6Address), // No leading zero
+ Some(ref mut v) => {
+ *v = *v * 10 + digit as u16;
+ if *v > 255 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ }
+ }
+ i += 1;
+ }
+
+ pieces[piece_pointer] = if let Some(v) = ipv4_piece {
+ pieces[piece_pointer] * 0x100 + v
+ } else {
+ return Err(ParseError::InvalidIpv6Address);
+ };
+ numbers_seen += 1;
+
+ if numbers_seen == 2 || numbers_seen == 4 {
+ piece_pointer += 1;
+ }
+ }
+
+ if numbers_seen != 4 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ }
+
+ if i < len {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+
+ match compress_pointer {
+ Some(compress_pointer) => {
+ let mut swaps = piece_pointer - compress_pointer;
+ piece_pointer = 7;
+ while swaps > 0 {
+ pieces.swap(piece_pointer, compress_pointer + swaps - 1);
+ swaps -= 1;
+ piece_pointer -= 1;
+ }
+ }
+ _ => {
+ if piece_pointer != 8 {
+ return Err(ParseError::InvalidIpv6Address);
+ }
+ }
+ }
+ Ok(Ipv6Addr::new(
+ pieces[0], pieces[1], pieces[2], pieces[3], pieces[4], pieces[5], pieces[6], pieces[7],
+ ))
+}
diff --git a/third_party/rust/url/src/lib.rs b/third_party/rust/url/src/lib.rs
new file mode 100644
index 0000000000..d60935c29d
--- /dev/null
+++ b/third_party/rust/url/src/lib.rs
@@ -0,0 +1,2553 @@
+// Copyright 2013-2015 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+
+rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
+for the [Rust](http://rust-lang.org/) programming language.
+
+
+# URL parsing and data structures
+
+First, URL parsing may fail for various reasons and therefore returns a `Result`.
+
+```
+use url::{Url, ParseError};
+
+assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
+```
+
+Let’s parse a valid URL and look at its components.
+
+```
+use url::{Url, Host, Position};
+# use url::ParseError;
+# fn run() -> Result<(), ParseError> {
+let issue_list_url = Url::parse(
+ "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
+)?;
+
+
+assert!(issue_list_url.scheme() == "https");
+assert!(issue_list_url.username() == "");
+assert!(issue_list_url.password() == None);
+assert!(issue_list_url.host_str() == Some("github.com"));
+assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
+assert!(issue_list_url.port() == None);
+assert!(issue_list_url.path() == "/rust-lang/rust/issues");
+assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
+ Some(vec!["rust-lang", "rust", "issues"]));
+assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
+assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
+assert!(issue_list_url.fragment() == None);
+assert!(!issue_list_url.cannot_be_a_base());
+# Ok(())
+# }
+# run().unwrap();
+```
+
+Some URLs are said to be *cannot-be-a-base*:
+they don’t have a username, password, host, or port,
+and their "path" is an arbitrary string rather than slash-separated segments:
+
+```
+use url::Url;
+# use url::ParseError;
+
+# fn run() -> Result<(), ParseError> {
+let data_url = Url::parse("data:text/plain,Hello?World#")?;
+
+assert!(data_url.cannot_be_a_base());
+assert!(data_url.scheme() == "data");
+assert!(data_url.path() == "text/plain,Hello");
+assert!(data_url.path_segments().is_none());
+assert!(data_url.query() == Some("World"));
+assert!(data_url.fragment() == Some(""));
+# Ok(())
+# }
+# run().unwrap();
+```
+
+
+# Base URL
+
+Many contexts allow URL *references* that can be relative to a *base URL*:
+
+```html
+<link rel="stylesheet" href="../main.css">
+```
+
+Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
+
+```
+use url::{Url, ParseError};
+
+assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
+```
+
+Use the `join` method on an `Url` to use it as a base URL:
+
+```
+use url::Url;
+# use url::ParseError;
+
+# fn run() -> Result<(), ParseError> {
+let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
+let css_url = this_document.join("../main.css")?;
+assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
+# Ok(())
+# }
+# run().unwrap();
+*/
+
+#![doc(html_root_url = "https://docs.rs/url/2.0.0")]
+
+#[macro_use]
+extern crate matches;
+extern crate idna;
+extern crate percent_encoding;
+#[cfg(feature = "serde")]
+extern crate serde;
+
+use host::HostInternal;
+use parser::{to_u32, Context, Parser, SchemeType, PATH_SEGMENT, USERINFO};
+use percent_encoding::{percent_decode, percent_encode, utf8_percent_encode};
+use std::borrow::Borrow;
+use std::cmp;
+#[cfg(feature = "serde")]
+use std::error::Error;
+use std::fmt::{self, Write};
+use std::hash;
+use std::io;
+use std::mem;
+use std::net::{IpAddr, SocketAddr, ToSocketAddrs};
+use std::ops::{Range, RangeFrom, RangeTo};
+use std::path::{Path, PathBuf};
+use std::str;
+
+pub use host::Host;
+pub use origin::{OpaqueOrigin, Origin};
+pub use parser::{ParseError, SyntaxViolation};
+pub use path_segments::PathSegmentsMut;
+pub use query_encoding::EncodingOverride;
+pub use slicing::Position;
+
+mod host;
+mod origin;
+mod parser;
+mod path_segments;
+mod query_encoding;
+mod slicing;
+
+pub mod form_urlencoded;
+#[doc(hidden)]
+pub mod quirks;
+
+/// A parsed URL record.
+#[derive(Clone)]
+pub struct Url {
+ /// Syntax in pseudo-BNF:
+ ///
+ /// url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
+ /// non-hierarchical = non-hierarchical-path
+ /// non-hierarchical-path = /* Does not start with "/" */
+ /// hierarchical = authority? hierarchical-path
+ /// authority = "//" userinfo? host [ ":" port ]?
+ /// userinfo = username [ ":" password ]? "@"
+ /// hierarchical-path = [ "/" path-segment ]+
+ serialization: String,
+
+ // Components
+ scheme_end: u32, // Before ':'
+ username_end: u32, // Before ':' (if a password is given) or '@' (if not)
+ host_start: u32,
+ host_end: u32,
+ host: HostInternal,
+ port: Option<u16>,
+ path_start: u32, // Before initial '/', if any
+ query_start: Option<u32>, // Before '?', unlike Position::QueryStart
+ fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
+}
+
+/// Full configuration for the URL parser.
+#[derive(Copy, Clone)]
+pub struct ParseOptions<'a> {
+ base_url: Option<&'a Url>,
+ encoding_override: EncodingOverride<'a>,
+ violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
+}
+
+impl<'a> ParseOptions<'a> {
+ /// Change the base URL
+ pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
+ self.base_url = new;
+ self
+ }
+
+ /// Override the character encoding of query strings.
+ /// This is a legacy concept only relevant for HTML.
+ pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
+ self.encoding_override = new;
+ self
+ }
+
+ /// Call the provided function or closure for a non-fatal `SyntaxViolation`
+ /// when it occurs during parsing. Note that since the provided function is
+ /// `Fn`, the caller might need to utilize _interior mutability_, such as with
+ /// a `RefCell`, to collect the violations.
+ ///
+ /// ## Example
+ /// ```
+ /// use std::cell::RefCell;
+ /// use url::{Url, SyntaxViolation};
+ /// # use url::ParseError;
+ /// # fn run() -> Result<(), url::ParseError> {
+ /// let violations = RefCell::new(Vec::new());
+ /// let url = Url::options()
+ /// .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
+ /// .parse("https:////example.com")?;
+ /// assert_eq!(url.as_str(), "https://example.com/");
+ /// assert_eq!(violations.into_inner(),
+ /// vec!(SyntaxViolation::ExpectedDoubleSlash));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
+ self.violation_fn = new;
+ self
+ }
+
+ /// Parse an URL string with the configuration so far.
+ pub fn parse(self, input: &str) -> Result<Url, ::ParseError> {
+ Parser {
+ serialization: String::with_capacity(input.len()),
+ base_url: self.base_url,
+ query_encoding_override: self.encoding_override,
+ violation_fn: self.violation_fn,
+ context: Context::UrlParser,
+ }
+ .parse_url(input)
+ }
+}
+
+impl Url {
+ /// Parse an absolute URL from a string.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.net")?;
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// If the function can not parse an absolute URL from the given string,
+ /// a [`ParseError`] variant will be returned.
+ ///
+ /// [`ParseError`]: enum.ParseError.html
+ #[inline]
+ pub fn parse(input: &str) -> Result<Url, ::ParseError> {
+ Url::options().parse(input)
+ }
+
+ /// Parse an absolute URL from a string and add params to its query string.
+ ///
+ /// Existing params are not removed.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
+ /// &[("lang", "rust"), ("browser", "servo")])?;
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// If the function can not parse an absolute URL from the given string,
+ /// a [`ParseError`] variant will be returned.
+ ///
+ /// [`ParseError`]: enum.ParseError.html
+ #[inline]
+ pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, ::ParseError>
+ where
+ I: IntoIterator,
+ I::Item: Borrow<(K, V)>,
+ K: AsRef<str>,
+ V: AsRef<str>,
+ {
+ let mut url = Url::options().parse(input);
+
+ if let Ok(ref mut url) = url {
+ url.query_pairs_mut().extend_pairs(iter);
+ }
+
+ url
+ }
+
+ /// Parse a string as an URL, with this URL as the base URL.
+ ///
+ /// Note: a trailing slash is significant.
+ /// Without it, the last path component is considered to be a “file” name
+ /// to be removed to get at the “directory” that is used as the base:
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let base = Url::parse("https://example.net/a/b.html")?;
+ /// let url = base.join("c.png")?;
+ /// assert_eq!(url.as_str(), "https://example.net/a/c.png"); // Not /a/b.html/c.png
+ ///
+ /// let base = Url::parse("https://example.net/a/b/")?;
+ /// let url = base.join("c.png")?;
+ /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// If the function can not parse an URL from the given string
+ /// with this URL as the base URL, a [`ParseError`] variant will be returned.
+ ///
+ /// [`ParseError`]: enum.ParseError.html
+ #[inline]
+ pub fn join(&self, input: &str) -> Result<Url, ::ParseError> {
+ Url::options().base_url(Some(self)).parse(input)
+ }
+
+ /// Return a default `ParseOptions` that can fully configure the URL parser.
+ ///
+ /// # Examples
+ ///
+ /// Get default `ParseOptions`, then change base url
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ /// # fn run() -> Result<(), ParseError> {
+ /// let options = Url::options();
+ /// let api = Url::parse("https://api.example.com")?;
+ /// let base_url = options.base_url(Some(&api));
+ /// let version_url = base_url.parse("version.json")?;
+ /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn options<'a>() -> ParseOptions<'a> {
+ ParseOptions {
+ base_url: None,
+ encoding_override: None,
+ violation_fn: None,
+ }
+ }
+
+ /// Return the serialization of this URL.
+ ///
+ /// This is fast since that serialization is already stored in the `Url` struct.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url_str = "https://example.net/";
+ /// let url = Url::parse(url_str)?;
+ /// assert_eq!(url.as_str(), url_str);
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn as_str(&self) -> &str {
+ &self.serialization
+ }
+
+ /// Return the serialization of this URL.
+ ///
+ /// This consumes the `Url` and takes ownership of the `String` stored in it.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url_str = "https://example.net/";
+ /// let url = Url::parse(url_str)?;
+ /// assert_eq!(url.into_string(), url_str);
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn into_string(self) -> String {
+ self.serialization
+ }
+
+ /// For internal testing, not part of the public API.
+ ///
+ /// Methods of the `Url` struct assume a number of invariants.
+ /// This checks each of these invariants and panic if one is not met.
+ /// This is for testing rust-url itself.
+ #[doc(hidden)]
+ pub fn check_invariants(&self) -> Result<(), String> {
+ macro_rules! assert {
+ ($x: expr) => {
+ if !$x {
+ return Err(format!(
+ "!( {} ) for URL {:?}",
+ stringify!($x),
+ self.serialization
+ ));
+ }
+ };
+ }
+
+ macro_rules! assert_eq {
+ ($a: expr, $b: expr) => {
+ {
+ let a = $a;
+ let b = $b;
+ if a != b {
+ return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
+ a, b, stringify!($a), stringify!($b),
+ self.serialization))
+ }
+ }
+ }
+ }
+
+ assert!(self.scheme_end >= 1);
+ assert!(matches!(self.byte_at(0), b'a'..=b'z' | b'A'..=b'Z'));
+ assert!(self
+ .slice(1..self.scheme_end)
+ .chars()
+ .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
+ assert_eq!(self.byte_at(self.scheme_end), b':');
+
+ if self.slice(self.scheme_end + 1..).starts_with("//") {
+ // URL with authority
+ match self.byte_at(self.username_end) {
+ b':' => {
+ assert!(self.host_start >= self.username_end + 2);
+ assert_eq!(self.byte_at(self.host_start - 1), b'@');
+ }
+ b'@' => assert!(self.host_start == self.username_end + 1),
+ _ => assert_eq!(self.username_end, self.scheme_end + 3),
+ }
+ assert!(self.host_start >= self.username_end);
+ assert!(self.host_end >= self.host_start);
+ let host_str = self.slice(self.host_start..self.host_end);
+ match self.host {
+ HostInternal::None => assert_eq!(host_str, ""),
+ HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
+ HostInternal::Ipv6(address) => {
+ let h: Host<String> = Host::Ipv6(address);
+ assert_eq!(host_str, h.to_string())
+ }
+ HostInternal::Domain => {
+ if SchemeType::from(self.scheme()).is_special() {
+ assert!(!host_str.is_empty())
+ }
+ }
+ }
+ if self.path_start == self.host_end {
+ assert_eq!(self.port, None);
+ } else {
+ assert_eq!(self.byte_at(self.host_end), b':');
+ let port_str = self.slice(self.host_end + 1..self.path_start);
+ assert_eq!(
+ self.port,
+ Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
+ );
+ }
+ assert_eq!(self.byte_at(self.path_start), b'/');
+ } else {
+ // Anarchist URL (no authority)
+ assert_eq!(self.username_end, self.scheme_end + 1);
+ assert_eq!(self.host_start, self.scheme_end + 1);
+ assert_eq!(self.host_end, self.scheme_end + 1);
+ assert_eq!(self.host, HostInternal::None);
+ assert_eq!(self.port, None);
+ assert_eq!(self.path_start, self.scheme_end + 1);
+ }
+ if let Some(start) = self.query_start {
+ assert!(start > self.path_start);
+ assert_eq!(self.byte_at(start), b'?');
+ }
+ if let Some(start) = self.fragment_start {
+ assert!(start > self.path_start);
+ assert_eq!(self.byte_at(start), b'#');
+ }
+ if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
+ assert!(fragment_start > query_start);
+ }
+
+ let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
+ assert_eq!(&self.serialization, &other.serialization);
+ assert_eq!(self.scheme_end, other.scheme_end);
+ assert_eq!(self.username_end, other.username_end);
+ assert_eq!(self.host_start, other.host_start);
+ assert_eq!(self.host_end, other.host_end);
+ assert!(
+ self.host == other.host ||
+ // XXX No host round-trips to empty host.
+ // See https://github.com/whatwg/url/issues/79
+ (self.host_str(), other.host_str()) == (None, Some(""))
+ );
+ assert_eq!(self.port, other.port);
+ assert_eq!(self.path_start, other.path_start);
+ assert_eq!(self.query_start, other.query_start);
+ assert_eq!(self.fragment_start, other.fragment_start);
+ Ok(())
+ }
+
+ /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
+ ///
+ /// Note: this returns an opaque origin for `file:` URLs, which causes
+ /// `url.origin() != url.origin()`.
+ ///
+ /// # Examples
+ ///
+ /// URL with `ftp` scheme:
+ ///
+ /// ```rust
+ /// use url::{Host, Origin, Url};
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://example.com/foo")?;
+ /// assert_eq!(url.origin(),
+ /// Origin::Tuple("ftp".into(),
+ /// Host::Domain("example.com".into()),
+ /// 21));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// URL with `blob` scheme:
+ ///
+ /// ```rust
+ /// use url::{Host, Origin, Url};
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("blob:https://example.com/foo")?;
+ /// assert_eq!(url.origin(),
+ /// Origin::Tuple("https".into(),
+ /// Host::Domain("example.com".into()),
+ /// 443));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// URL with `file` scheme:
+ ///
+ /// ```rust
+ /// use url::{Host, Origin, Url};
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("file:///tmp/foo")?;
+ /// assert!(!url.origin().is_tuple());
+ ///
+ /// let other_url = Url::parse("file:///tmp/foo")?;
+ /// assert!(url.origin() != other_url.origin());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// URL with other scheme:
+ ///
+ /// ```rust
+ /// use url::{Host, Origin, Url};
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("foo:bar")?;
+ /// assert!(!url.origin().is_tuple());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn origin(&self) -> Origin {
+ origin::url_origin(self)
+ }
+
+ /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("file:///tmp/foo")?;
+ /// assert_eq!(url.scheme(), "file");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn scheme(&self) -> &str {
+ self.slice(..self.scheme_end)
+ }
+
+ /// Return whether the URL has an 'authority',
+ /// which can contain a username, password, host, and port number.
+ ///
+ /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
+ /// or cannot-be-a-base like `data:text/plain,Stuff`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert!(url.has_authority());
+ ///
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert!(!url.has_authority());
+ ///
+ /// let url = Url::parse("data:text/plain,Stuff")?;
+ /// assert!(!url.has_authority());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn has_authority(&self) -> bool {
+ debug_assert!(self.byte_at(self.scheme_end) == b':');
+ self.slice(self.scheme_end..).starts_with("://")
+ }
+
+ /// Return whether this URL is a cannot-be-a-base URL,
+ /// meaning that parsing a relative URL string with this URL as the base will return an error.
+ ///
+ /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
+ /// as is typically the case of `data:` and `mailto:` URLs.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert!(!url.cannot_be_a_base());
+ ///
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert!(!url.cannot_be_a_base());
+ ///
+ /// let url = Url::parse("data:text/plain,Stuff")?;
+ /// assert!(url.cannot_be_a_base());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn cannot_be_a_base(&self) -> bool {
+ !self.slice(self.path_start..).starts_with('/')
+ }
+
+ /// Return the username for this URL (typically the empty string)
+ /// as a percent-encoded ASCII string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert_eq!(url.username(), "rms");
+ ///
+ /// let url = Url::parse("ftp://:secret123@example.com")?;
+ /// assert_eq!(url.username(), "");
+ ///
+ /// let url = Url::parse("https://example.com")?;
+ /// assert_eq!(url.username(), "");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn username(&self) -> &str {
+ if self.has_authority() {
+ self.slice(self.scheme_end + ("://".len() as u32)..self.username_end)
+ } else {
+ ""
+ }
+ }
+
+ /// Return the password for this URL, if any, as a percent-encoded ASCII string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://rms:secret123@example.com")?;
+ /// assert_eq!(url.password(), Some("secret123"));
+ ///
+ /// let url = Url::parse("ftp://:secret123@example.com")?;
+ /// assert_eq!(url.password(), Some("secret123"));
+ ///
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert_eq!(url.password(), None);
+ ///
+ /// let url = Url::parse("https://example.com")?;
+ /// assert_eq!(url.password(), None);
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn password(&self) -> Option<&str> {
+ // This ':' is not the one marking a port number since a host can not be empty.
+ // (Except for file: URLs, which do not have port numbers.)
+ if self.has_authority() && self.byte_at(self.username_end) == b':' {
+ debug_assert!(self.byte_at(self.host_start - 1) == b'@');
+ Some(self.slice(self.username_end + 1..self.host_start - 1))
+ } else {
+ None
+ }
+ }
+
+ /// Equivalent to `url.host().is_some()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert!(url.has_host());
+ ///
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert!(!url.has_host());
+ ///
+ /// let url = Url::parse("data:text/plain,Stuff")?;
+ /// assert!(!url.has_host());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn has_host(&self) -> bool {
+ !matches!(self.host, HostInternal::None)
+ }
+
+ /// Return the string representation of the host (domain or IP address) for this URL, if any.
+ ///
+ /// Non-ASCII domains are punycode-encoded per IDNA.
+ /// IPv6 addresses are given between `[` and `]` brackets.
+ ///
+ /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
+ /// don’t have a host.
+ ///
+ /// See also the `host` method.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://127.0.0.1/index.html")?;
+ /// assert_eq!(url.host_str(), Some("127.0.0.1"));
+ ///
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert_eq!(url.host_str(), Some("example.com"));
+ ///
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert_eq!(url.host_str(), None);
+ ///
+ /// let url = Url::parse("data:text/plain,Stuff")?;
+ /// assert_eq!(url.host_str(), None);
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn host_str(&self) -> Option<&str> {
+ if self.has_host() {
+ Some(self.slice(self.host_start..self.host_end))
+ } else {
+ None
+ }
+ }
+
+ /// Return the parsed representation of the host for this URL.
+ /// Non-ASCII domain labels are punycode-encoded per IDNA.
+ ///
+ /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
+ /// don’t have a host.
+ ///
+ /// See also the `host_str` method.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://127.0.0.1/index.html")?;
+ /// assert!(url.host().is_some());
+ ///
+ /// let url = Url::parse("ftp://rms@example.com")?;
+ /// assert!(url.host().is_some());
+ ///
+ /// let url = Url::parse("unix:/run/foo.socket")?;
+ /// assert!(url.host().is_none());
+ ///
+ /// let url = Url::parse("data:text/plain,Stuff")?;
+ /// assert!(url.host().is_none());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn host(&self) -> Option<Host<&str>> {
+ match self.host {
+ HostInternal::None => None,
+ HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
+ HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
+ HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
+ }
+ }
+
+ /// If this URL has a host and it is a domain name (not an IP address), return it.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://127.0.0.1/")?;
+ /// assert_eq!(url.domain(), None);
+ ///
+ /// let url = Url::parse("mailto:rms@example.net")?;
+ /// assert_eq!(url.domain(), None);
+ ///
+ /// let url = Url::parse("https://example.com/")?;
+ /// assert_eq!(url.domain(), Some("example.com"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn domain(&self) -> Option<&str> {
+ match self.host {
+ HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
+ _ => None,
+ }
+ }
+
+ /// Return the port number for this URL, if any.
+ ///
+ /// Note that default port numbers are never reflected by the serialization,
+ /// use the `port_or_known_default()` method if you want a default port number returned.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.com")?;
+ /// assert_eq!(url.port(), None);
+ ///
+ /// let url = Url::parse("https://example.com:443/")?;
+ /// assert_eq!(url.port(), None);
+ ///
+ /// let url = Url::parse("ssh://example.com:22")?;
+ /// assert_eq!(url.port(), Some(22));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn port(&self) -> Option<u16> {
+ self.port
+ }
+
+ /// Return the port number for this URL, or the default port number if it is known.
+ ///
+ /// This method only knows the default port number
+ /// of the `http`, `https`, `ws`, `wss`, `ftp`, and `gopher` schemes.
+ ///
+ /// For URLs in these schemes, this method always returns `Some(_)`.
+ /// For other schemes, it is the same as `Url::port()`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("foo://example.com")?;
+ /// assert_eq!(url.port_or_known_default(), None);
+ ///
+ /// let url = Url::parse("foo://example.com:1456")?;
+ /// assert_eq!(url.port_or_known_default(), Some(1456));
+ ///
+ /// let url = Url::parse("https://example.com")?;
+ /// assert_eq!(url.port_or_known_default(), Some(443));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ #[inline]
+ pub fn port_or_known_default(&self) -> Option<u16> {
+ self.port.or_else(|| parser::default_port(self.scheme()))
+ }
+
+ /// Resolve a URL’s host and port number to `SocketAddr`.
+ ///
+ /// If the URL has the default port number of a scheme that is unknown to this library,
+ /// `default_port_number` provides an opportunity to provide the actual port number.
+ /// In non-example code this should be implemented either simply as `|| None`,
+ /// or by matching on the URL’s `.scheme()`.
+ ///
+ /// If the host is a domain, it is resolved using the standard library’s DNS support.
+ ///
+ /// # Examples
+ ///
+ /// ```no_run
+ /// let url = url::Url::parse("https://example.net/").unwrap();
+ /// let addrs = url.socket_addrs(|| None).unwrap();
+ /// std::net::TcpStream::connect(&*addrs)
+ /// # ;
+ /// ```
+ ///
+ /// ```
+ /// /// With application-specific known default port numbers
+ /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
+ /// url.socket_addrs(|| match url.scheme() {
+ /// "socks5" | "socks5h" => Some(1080),
+ /// _ => None,
+ /// })
+ /// }
+ /// ```
+ pub fn socket_addrs(
+ &self,
+ default_port_number: impl Fn() -> Option<u16>,
+ ) -> io::Result<Vec<SocketAddr>> {
+ // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
+ // causes borrowck issues because the return value borrows `default_port_number`:
+ //
+ // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
+ //
+ // > This RFC proposes that *all* type parameters are considered in scope
+ // > for `impl Trait` in return position
+
+ fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
+ opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
+ }
+
+ let host = io_result(self.host(), "No host name in the URL")?;
+ let port = io_result(
+ self.port_or_known_default().or_else(default_port_number),
+ "No port number in the URL",
+ )?;
+ Ok(match host {
+ Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
+ Host::Ipv4(ip) => vec![(ip, port).into()],
+ Host::Ipv6(ip) => vec![(ip, port).into()],
+ })
+ }
+
+ /// Return the path for this URL, as a percent-encoded ASCII string.
+ /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
+ /// For other URLs, this starts with a '/' slash
+ /// and continues with slash-separated path segments.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.com/api/versions?page=2")?;
+ /// assert_eq!(url.path(), "/api/versions");
+ ///
+ /// let url = Url::parse("https://example.com")?;
+ /// assert_eq!(url.path(), "/");
+ ///
+ /// let url = Url::parse("https://example.com/countries/việt nam")?;
+ /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn path(&self) -> &str {
+ match (self.query_start, self.fragment_start) {
+ (None, None) => self.slice(self.path_start..),
+ (Some(next_component_start), _) | (None, Some(next_component_start)) => {
+ self.slice(self.path_start..next_component_start)
+ }
+ }
+ }
+
+ /// Unless this URL is cannot-be-a-base,
+ /// return an iterator of '/' slash-separated path segments,
+ /// each as a percent-encoded ASCII string.
+ ///
+ /// Return `None` for cannot-be-a-base URLs.
+ ///
+ /// When `Some` is returned, the iterator always contains at least one string
+ /// (which may be empty).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let url = Url::parse("https://example.com/foo/bar")?;
+ /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
+ /// assert_eq!(path_segments.next(), Some("foo"));
+ /// assert_eq!(path_segments.next(), Some("bar"));
+ /// assert_eq!(path_segments.next(), None);
+ ///
+ /// let url = Url::parse("https://example.com")?;
+ /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
+ /// assert_eq!(path_segments.next(), Some(""));
+ /// assert_eq!(path_segments.next(), None);
+ ///
+ /// let url = Url::parse("data:text/plain,HelloWorld")?;
+ /// assert!(url.path_segments().is_none());
+ ///
+ /// let url = Url::parse("https://example.com/countries/việt nam")?;
+ /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
+ /// assert_eq!(path_segments.next(), Some("countries"));
+ /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn path_segments(&self) -> Option<str::Split<char>> {
+ let path = self.path();
+ if path.starts_with('/') {
+ Some(path[1..].split('/'))
+ } else {
+ None
+ }
+ }
+
+ /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.com/products?page=2")?;
+ /// let query = url.query();
+ /// assert_eq!(query, Some("page=2"));
+ ///
+ /// let url = Url::parse("https://example.com/products")?;
+ /// let query = url.query();
+ /// assert!(query.is_none());
+ ///
+ /// let url = Url::parse("https://example.com/?country=español")?;
+ /// let query = url.query();
+ /// assert_eq!(query, Some("country=espa%C3%B1ol"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn query(&self) -> Option<&str> {
+ match (self.query_start, self.fragment_start) {
+ (None, _) => None,
+ (Some(query_start), None) => {
+ debug_assert!(self.byte_at(query_start) == b'?');
+ Some(self.slice(query_start + 1..))
+ }
+ (Some(query_start), Some(fragment_start)) => {
+ debug_assert!(self.byte_at(query_start) == b'?');
+ Some(self.slice(query_start + 1..fragment_start))
+ }
+ }
+ }
+
+ /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
+ /// and return an iterator of (key, value) pairs.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use std::borrow::Cow;
+ ///
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
+ /// let mut pairs = url.query_pairs();
+ ///
+ /// assert_eq!(pairs.count(), 2);
+ ///
+ /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
+ /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ ///
+
+ #[inline]
+ pub fn query_pairs(&self) -> form_urlencoded::Parse {
+ form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
+ }
+
+ /// Return this URL’s fragment identifier, if any.
+ ///
+ /// A fragment is the part of the URL after the `#` symbol.
+ /// The fragment is optional and, if present, contains a fragment identifier
+ /// that identifies a secondary resource, such as a section heading
+ /// of a document.
+ ///
+ /// In HTML, the fragment identifier is usually the id attribute of a an element
+ /// that is scrolled to on load. Browsers typically will not send the fragment portion
+ /// of a URL to the server.
+ ///
+ /// **Note:** the parser did *not* percent-encode this component,
+ /// but the input may have been percent-encoded already.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let url = Url::parse("https://example.com/data.csv#row=4")?;
+ ///
+ /// assert_eq!(url.fragment(), Some("row=4"));
+ ///
+ /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
+ ///
+ /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn fragment(&self) -> Option<&str> {
+ self.fragment_start.map(|start| {
+ debug_assert!(self.byte_at(start) == b'#');
+ self.slice(start + 1..)
+ })
+ }
+
+ fn mutate<F: FnOnce(&mut Parser) -> R, R>(&mut self, f: F) -> R {
+ let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
+ let result = f(&mut parser);
+ self.serialization = parser.serialization;
+ result
+ }
+
+ /// Change this URL’s fragment identifier.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.com/data.csv")?;
+ /// assert_eq!(url.as_str(), "https://example.com/data.csv");
+
+ /// url.set_fragment(Some("cell=4,1-6,2"));
+ /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
+ /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
+ ///
+ /// url.set_fragment(None);
+ /// assert_eq!(url.as_str(), "https://example.com/data.csv");
+ /// assert!(url.fragment().is_none());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_fragment(&mut self, fragment: Option<&str>) {
+ // Remove any previous fragment
+ if let Some(start) = self.fragment_start {
+ debug_assert!(self.byte_at(start) == b'#');
+ self.serialization.truncate(start as usize);
+ }
+ // Write the new one
+ if let Some(input) = fragment {
+ self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
+ self.serialization.push('#');
+ self.mutate(|parser| parser.parse_fragment(parser::Input::new(input)))
+ } else {
+ self.fragment_start = None
+ }
+ }
+
+ fn take_fragment(&mut self) -> Option<String> {
+ self.fragment_start.take().map(|start| {
+ debug_assert!(self.byte_at(start) == b'#');
+ let fragment = self.slice(start + 1..).to_owned();
+ self.serialization.truncate(start as usize);
+ fragment
+ })
+ }
+
+ fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
+ if let Some(ref fragment) = fragment {
+ assert!(self.fragment_start.is_none());
+ self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
+ self.serialization.push('#');
+ self.serialization.push_str(fragment);
+ }
+ }
+
+ /// Change this URL’s query string.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.com/products")?;
+ /// assert_eq!(url.as_str(), "https://example.com/products");
+ ///
+ /// url.set_query(Some("page=2"));
+ /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
+ /// assert_eq!(url.query(), Some("page=2"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_query(&mut self, query: Option<&str>) {
+ let fragment = self.take_fragment();
+
+ // Remove any previous query
+ if let Some(start) = self.query_start.take() {
+ debug_assert!(self.byte_at(start) == b'?');
+ self.serialization.truncate(start as usize);
+ }
+ // Write the new query, if any
+ if let Some(input) = query {
+ self.query_start = Some(to_u32(self.serialization.len()).unwrap());
+ self.serialization.push('?');
+ let scheme_type = SchemeType::from(self.scheme());
+ let scheme_end = self.scheme_end;
+ self.mutate(|parser| {
+ parser.parse_query(scheme_type, scheme_end, parser::Input::new(input))
+ });
+ }
+
+ self.restore_already_parsed_fragment(fragment);
+ }
+
+ /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
+ /// in `application/x-www-form-urlencoded` syntax.
+ ///
+ /// The return value has a method-chaining API:
+ ///
+ /// ```rust
+ /// # use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
+ /// assert_eq!(url.query(), Some("lang=fr"));
+ ///
+ /// url.query_pairs_mut().append_pair("foo", "bar");
+ /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
+ /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
+ ///
+ /// url.query_pairs_mut()
+ /// .clear()
+ /// .append_pair("foo", "bar & baz")
+ /// .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
+ /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
+ /// assert_eq!(url.as_str(),
+ /// "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
+ /// not `url.set_query(None)`.
+ ///
+ /// The state of `Url` is unspecified if this return value is leaked without being dropped.
+ pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<UrlQuery> {
+ let fragment = self.take_fragment();
+
+ let query_start;
+ if let Some(start) = self.query_start {
+ debug_assert!(self.byte_at(start) == b'?');
+ query_start = start as usize;
+ } else {
+ query_start = self.serialization.len();
+ self.query_start = Some(to_u32(query_start).unwrap());
+ self.serialization.push('?');
+ }
+
+ let query = UrlQuery {
+ url: Some(self),
+ fragment,
+ };
+ form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
+ }
+
+ fn take_after_path(&mut self) -> String {
+ match (self.query_start, self.fragment_start) {
+ (Some(i), _) | (None, Some(i)) => {
+ let after_path = self.slice(i..).to_owned();
+ self.serialization.truncate(i as usize);
+ after_path
+ }
+ (None, None) => String::new(),
+ }
+ }
+
+ /// Change this URL’s path.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.com")?;
+ /// url.set_path("api/comments");
+ /// assert_eq!(url.as_str(), "https://example.com/api/comments");
+ /// assert_eq!(url.path(), "/api/comments");
+ ///
+ /// let mut url = Url::parse("https://example.com/api")?;
+ /// url.set_path("data/report.csv");
+ /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
+ /// assert_eq!(url.path(), "/data/report.csv");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_path(&mut self, mut path: &str) {
+ let after_path = self.take_after_path();
+ let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
+ let cannot_be_a_base = self.cannot_be_a_base();
+ let scheme_type = SchemeType::from(self.scheme());
+ self.serialization.truncate(self.path_start as usize);
+ self.mutate(|parser| {
+ if cannot_be_a_base {
+ if path.starts_with('/') {
+ parser.serialization.push_str("%2F");
+ path = &path[1..];
+ }
+ parser.parse_cannot_be_a_base_path(parser::Input::new(path));
+ } else {
+ let mut has_host = true; // FIXME
+ parser.parse_path_start(scheme_type, &mut has_host, parser::Input::new(path));
+ }
+ });
+ self.restore_after_path(old_after_path_pos, &after_path);
+ }
+
+ /// Return an object with methods to manipulate this URL’s path segments.
+ ///
+ /// Return `Err(())` if this URL is cannot-be-a-base.
+ pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut, ()> {
+ if self.cannot_be_a_base() {
+ Err(())
+ } else {
+ Ok(path_segments::new(self))
+ }
+ }
+
+ fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
+ let new_after_path_position = to_u32(self.serialization.len()).unwrap();
+ let adjust = |index: &mut u32| {
+ *index -= old_after_path_position;
+ *index += new_after_path_position;
+ };
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+ self.serialization.push_str(after_path)
+ }
+
+ /// Change this URL’s port number.
+ ///
+ /// Note that default port numbers are not reflected in the serialization.
+ ///
+ /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
+ /// do nothing and return `Err`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("ssh://example.net:2048/")?;
+ ///
+ /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
+ /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
+ ///
+ /// url.set_port(None).map_err(|_| "cannot be base")?;
+ /// assert_eq!(url.as_str(), "ssh://example.net/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Known default port numbers are not reflected:
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("https://example.org/")?;
+ ///
+ /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
+ /// assert!(url.port().is_none());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Cannot set port for cannot-be-a-base URLs:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rms@example.net")?;
+ ///
+ /// let result = url.set_port(Some(80));
+ /// assert!(result.is_err());
+ ///
+ /// let result = url.set_port(None);
+ /// assert!(result.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
+ // has_host implies !cannot_be_a_base
+ if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
+ return Err(());
+ }
+ if port.is_some() && port == parser::default_port(self.scheme()) {
+ port = None
+ }
+ self.set_port_internal(port);
+ Ok(())
+ }
+
+ fn set_port_internal(&mut self, port: Option<u16>) {
+ match (self.port, port) {
+ (None, None) => {}
+ (Some(_), None) => {
+ self.serialization
+ .drain(self.host_end as usize..self.path_start as usize);
+ let offset = self.path_start - self.host_end;
+ self.path_start = self.host_end;
+ if let Some(ref mut index) = self.query_start {
+ *index -= offset
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ *index -= offset
+ }
+ }
+ (Some(old), Some(new)) if old == new => {}
+ (_, Some(new)) => {
+ let path_and_after = self.slice(self.path_start..).to_owned();
+ self.serialization.truncate(self.host_end as usize);
+ write!(&mut self.serialization, ":{}", new).unwrap();
+ let old_path_start = self.path_start;
+ let new_path_start = to_u32(self.serialization.len()).unwrap();
+ self.path_start = new_path_start;
+ let adjust = |index: &mut u32| {
+ *index -= old_path_start;
+ *index += new_path_start;
+ };
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+ self.serialization.push_str(&path_and_after);
+ }
+ }
+ self.port = port;
+ }
+
+ /// Change this URL’s host.
+ ///
+ /// Removing the host (calling this with `None`)
+ /// will also remove any username, password, and port number.
+ ///
+ /// # Examples
+ ///
+ /// Change host:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.net")?;
+ /// let result = url.set_host(Some("rust-lang.org"));
+ /// assert!(result.is_ok());
+ /// assert_eq!(url.as_str(), "https://rust-lang.org/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Remove host:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("foo://example.net")?;
+ /// let result = url.set_host(None);
+ /// assert!(result.is_ok());
+ /// assert_eq!(url.as_str(), "foo:/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Cannot remove host for 'special' schemes (e.g. `http`):
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.net")?;
+ /// let result = url.set_host(None);
+ /// assert!(result.is_err());
+ /// assert_eq!(url.as_str(), "https://example.net/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Cannot change or remove host for cannot-be-a-base URLs:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rms@example.net")?;
+ ///
+ /// let result = url.set_host(Some("rust-lang.org"));
+ /// assert!(result.is_err());
+ /// assert_eq!(url.as_str(), "mailto:rms@example.net");
+ ///
+ /// let result = url.set_host(None);
+ /// assert!(result.is_err());
+ /// assert_eq!(url.as_str(), "mailto:rms@example.net");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
+ /// a [`ParseError`] variant will be returned.
+ ///
+ /// [`ParseError`]: enum.ParseError.html
+ pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
+ if self.cannot_be_a_base() {
+ return Err(ParseError::SetHostOnCannotBeABaseUrl);
+ }
+
+ if let Some(host) = host {
+ if host == "" && SchemeType::from(self.scheme()).is_special() {
+ return Err(ParseError::EmptyHost);
+ }
+ if SchemeType::from(self.scheme()).is_special() {
+ self.set_host_internal(Host::parse(host)?, None)
+ } else {
+ self.set_host_internal(Host::parse_opaque(host)?, None)
+ }
+ } else if self.has_host() {
+ if SchemeType::from(self.scheme()).is_special() {
+ return Err(ParseError::EmptyHost);
+ }
+ debug_assert!(self.byte_at(self.scheme_end) == b':');
+ debug_assert!(self.byte_at(self.path_start) == b'/');
+ let new_path_start = self.scheme_end + 1;
+ self.serialization
+ .drain(new_path_start as usize..self.path_start as usize);
+ let offset = self.path_start - new_path_start;
+ self.path_start = new_path_start;
+ self.username_end = new_path_start;
+ self.host_start = new_path_start;
+ self.host_end = new_path_start;
+ self.port = None;
+ if let Some(ref mut index) = self.query_start {
+ *index -= offset
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ *index -= offset
+ }
+ }
+ Ok(())
+ }
+
+ /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
+ fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
+ let old_suffix_pos = if opt_new_port.is_some() {
+ self.path_start
+ } else {
+ self.host_end
+ };
+ let suffix = self.slice(old_suffix_pos..).to_owned();
+ self.serialization.truncate(self.host_start as usize);
+ if !self.has_authority() {
+ debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
+ debug_assert!(self.username_end == self.host_start);
+ self.serialization.push('/');
+ self.serialization.push('/');
+ self.username_end += 2;
+ self.host_start += 2;
+ }
+ write!(&mut self.serialization, "{}", host).unwrap();
+ self.host_end = to_u32(self.serialization.len()).unwrap();
+ self.host = host.into();
+
+ if let Some(new_port) = opt_new_port {
+ self.port = new_port;
+ if let Some(port) = new_port {
+ write!(&mut self.serialization, ":{}", port).unwrap();
+ }
+ }
+ let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
+ self.serialization.push_str(&suffix);
+
+ let adjust = |index: &mut u32| {
+ *index -= old_suffix_pos;
+ *index += new_suffix_pos;
+ };
+ adjust(&mut self.path_start);
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+ }
+
+ /// Change this URL’s host to the given IP address.
+ ///
+ /// If this URL is cannot-be-a-base, do nothing and return `Err`.
+ ///
+ /// Compared to `Url::set_host`, this skips the host parser.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("http://example.com")?;
+ /// url.set_ip_host("127.0.0.1".parse().unwrap());
+ /// assert_eq!(url.host_str(), Some("127.0.0.1"));
+ /// assert_eq!(url.as_str(), "http://127.0.0.1/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Cannot change URL's from mailto(cannot-be-base) to ip:
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rms@example.com")?;
+ /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
+ ///
+ /// assert_eq!(url.as_str(), "mailto:rms@example.com");
+ /// assert!(result.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
+ if self.cannot_be_a_base() {
+ return Err(());
+ }
+
+ let address = match address {
+ IpAddr::V4(address) => Host::Ipv4(address),
+ IpAddr::V6(address) => Host::Ipv6(address),
+ };
+ self.set_host_internal(address, None);
+ Ok(())
+ }
+
+ /// Change this URL’s password.
+ ///
+ /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rmz@example.com")?;
+ /// let result = url.set_password(Some("secret_password"));
+ /// assert!(result.is_err());
+ ///
+ /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
+ /// let result = url.set_password(Some("secret_password"));
+ /// assert_eq!(url.password(), Some("secret_password"));
+ ///
+ /// let mut url = Url::parse("ftp://user2:@example.com")?;
+ /// let result = url.set_password(Some("secret2"));
+ /// assert!(result.is_ok());
+ /// assert_eq!(url.password(), Some("secret2"));
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
+ // has_host implies !cannot_be_a_base
+ if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
+ return Err(());
+ }
+ if let Some(password) = password {
+ let host_and_after = self.slice(self.host_start..).to_owned();
+ self.serialization.truncate(self.username_end as usize);
+ self.serialization.push(':');
+ self.serialization
+ .extend(utf8_percent_encode(password, USERINFO));
+ self.serialization.push('@');
+
+ let old_host_start = self.host_start;
+ let new_host_start = to_u32(self.serialization.len()).unwrap();
+ let adjust = |index: &mut u32| {
+ *index -= old_host_start;
+ *index += new_host_start;
+ };
+ self.host_start = new_host_start;
+ adjust(&mut self.host_end);
+ adjust(&mut self.path_start);
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+
+ self.serialization.push_str(&host_and_after);
+ } else if self.byte_at(self.username_end) == b':' {
+ // If there is a password to remove
+ let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
+ debug_assert!(has_username_or_password);
+ let username_start = self.scheme_end + 3;
+ let empty_username = username_start == self.username_end;
+ let start = self.username_end; // Remove the ':'
+ let end = if empty_username {
+ self.host_start // Remove the '@' as well
+ } else {
+ self.host_start - 1 // Keep the '@' to separate the username from the host
+ };
+ self.serialization.drain(start as usize..end as usize);
+ let offset = end - start;
+ self.host_start -= offset;
+ self.host_end -= offset;
+ self.path_start -= offset;
+ if let Some(ref mut index) = self.query_start {
+ *index -= offset
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ *index -= offset
+ }
+ }
+ Ok(())
+ }
+
+ /// Change this URL’s username.
+ ///
+ /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
+ /// # Examples
+ ///
+ /// Cannot setup username from mailto(cannot-be-base)
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rmz@example.com")?;
+ /// let result = url.set_username("user1");
+ /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
+ /// assert!(result.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Setup username to user1
+ ///
+ /// ```rust
+ /// use url::{Url, ParseError};
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
+ /// let result = url.set_username("user1");
+ /// assert!(result.is_ok());
+ /// assert_eq!(url.username(), "user1");
+ /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
+ // has_host implies !cannot_be_a_base
+ if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
+ return Err(());
+ }
+ let username_start = self.scheme_end + 3;
+ debug_assert!(self.slice(self.scheme_end..username_start) == "://");
+ if self.slice(username_start..self.username_end) == username {
+ return Ok(());
+ }
+ let after_username = self.slice(self.username_end..).to_owned();
+ self.serialization.truncate(username_start as usize);
+ self.serialization
+ .extend(utf8_percent_encode(username, USERINFO));
+
+ let mut removed_bytes = self.username_end;
+ self.username_end = to_u32(self.serialization.len()).unwrap();
+ let mut added_bytes = self.username_end;
+
+ let new_username_is_empty = self.username_end == username_start;
+ match (new_username_is_empty, after_username.chars().next()) {
+ (true, Some('@')) => {
+ removed_bytes += 1;
+ self.serialization.push_str(&after_username[1..]);
+ }
+ (false, Some('@')) | (_, Some(':')) | (true, _) => {
+ self.serialization.push_str(&after_username);
+ }
+ (false, _) => {
+ added_bytes += 1;
+ self.serialization.push('@');
+ self.serialization.push_str(&after_username);
+ }
+ }
+
+ let adjust = |index: &mut u32| {
+ *index -= removed_bytes;
+ *index += added_bytes;
+ };
+ adjust(&mut self.host_start);
+ adjust(&mut self.host_end);
+ adjust(&mut self.path_start);
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+ Ok(())
+ }
+
+ /// Change this URL’s scheme.
+ ///
+ /// Do nothing and return `Err` if:
+ ///
+ /// * The new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
+ /// * This URL is cannot-be-a-base and the new scheme is one of
+ /// `http`, `https`, `ws`, `wss`, `ftp`, or `gopher`
+ ///
+ /// # Examples
+ ///
+ /// Change the URL’s scheme from `https` to `foo`:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.net")?;
+ /// let result = url.set_scheme("foo");
+ /// assert_eq!(url.as_str(), "foo://example.net/");
+ /// assert!(result.is_ok());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ ///
+ /// Cannot change URL’s scheme from `https` to `foõ`:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("https://example.net")?;
+ /// let result = url.set_scheme("foõ");
+ /// assert_eq!(url.as_str(), "https://example.net/");
+ /// assert!(result.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
+ ///
+ /// ```
+ /// use url::Url;
+ /// # use url::ParseError;
+ ///
+ /// # fn run() -> Result<(), ParseError> {
+ /// let mut url = Url::parse("mailto:rms@example.net")?;
+ /// let result = url.set_scheme("https");
+ /// assert_eq!(url.as_str(), "mailto:rms@example.net");
+ /// assert!(result.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
+ let mut parser = Parser::for_setter(String::new());
+ let remaining = parser.parse_scheme(parser::Input::new(scheme))?;
+ if !remaining.is_empty()
+ || (!self.has_host() && SchemeType::from(&parser.serialization).is_special())
+ {
+ return Err(());
+ }
+ let old_scheme_end = self.scheme_end;
+ let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
+ let adjust = |index: &mut u32| {
+ *index -= old_scheme_end;
+ *index += new_scheme_end;
+ };
+
+ self.scheme_end = new_scheme_end;
+ adjust(&mut self.username_end);
+ adjust(&mut self.host_start);
+ adjust(&mut self.host_end);
+ adjust(&mut self.path_start);
+ if let Some(ref mut index) = self.query_start {
+ adjust(index)
+ }
+ if let Some(ref mut index) = self.fragment_start {
+ adjust(index)
+ }
+
+ parser.serialization.push_str(self.slice(old_scheme_end..));
+ self.serialization = parser.serialization;
+ Ok(())
+ }
+
+ /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
+ ///
+ /// This returns `Err` if the given path is not absolute or,
+ /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
+ ///
+ /// # Examples
+ ///
+ /// On Unix-like platforms:
+ ///
+ /// ```
+ /// # if cfg!(unix) {
+ /// use url::Url;
+ ///
+ /// # fn run() -> Result<(), ()> {
+ /// let url = Url::from_file_path("/tmp/foo.txt")?;
+ /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
+ ///
+ /// let url = Url::from_file_path("../foo.txt");
+ /// assert!(url.is_err());
+ ///
+ /// let url = Url::from_file_path("https://google.com/");
+ /// assert!(url.is_err());
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// # }
+ /// ```
+ #[cfg(any(unix, windows, target_os = "redox"))]
+ pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
+ let mut serialization = "file://".to_owned();
+ let host_start = serialization.len() as u32;
+ let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
+ Ok(Url {
+ serialization,
+ scheme_end: "file".len() as u32,
+ username_end: host_start,
+ host_start,
+ host_end,
+ host,
+ port: None,
+ path_start: host_end,
+ query_start: None,
+ fragment_start: None,
+ })
+ }
+
+ /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
+ ///
+ /// This returns `Err` if the given path is not absolute or,
+ /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
+ ///
+ /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
+ /// so that the entire path is considered when using this URL as a base URL.
+ ///
+ /// For example:
+ ///
+ /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
+ /// as the base URL is `file:///var/www/index.html`
+ /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
+ /// as the base URL is `file:///var/index.html`, which might not be what was intended.
+ ///
+ /// Note that `std::path` does not consider trailing slashes significant
+ /// and usually does not include them (e.g. in `Path::parent()`).
+ #[cfg(any(unix, windows, target_os = "redox"))]
+ pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
+ let mut url = Url::from_file_path(path)?;
+ if !url.serialization.ends_with('/') {
+ url.serialization.push('/')
+ }
+ Ok(url)
+ }
+
+ /// Serialize with Serde using the internal representation of the `Url` struct.
+ ///
+ /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
+ /// for speed, compared to the `Deserialize` trait impl.
+ ///
+ /// This method is only available if the `serde` Cargo feature is enabled.
+ #[cfg(feature = "serde")]
+ #[deny(unused)]
+ pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::Serialize;
+ // Destructuring first lets us ensure that adding or removing fields forces this method
+ // to be updated
+ let Url {
+ ref serialization,
+ ref scheme_end,
+ ref username_end,
+ ref host_start,
+ ref host_end,
+ ref host,
+ ref port,
+ ref path_start,
+ ref query_start,
+ ref fragment_start,
+ } = *self;
+ (
+ serialization,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ query_start,
+ fragment_start,
+ )
+ .serialize(serializer)
+ }
+
+ /// Serialize with Serde using the internal representation of the `Url` struct.
+ ///
+ /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
+ /// for speed, compared to the `Deserialize` trait impl.
+ ///
+ /// This method is only available if the `serde` Cargo feature is enabled.
+ #[cfg(feature = "serde")]
+ #[deny(unused)]
+ pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ use serde::de::{Deserialize, Error, Unexpected};
+ let (
+ serialization,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ query_start,
+ fragment_start,
+ ) = Deserialize::deserialize(deserializer)?;
+ let url = Url {
+ serialization,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ query_start,
+ fragment_start,
+ };
+ if cfg!(debug_assertions) {
+ url.check_invariants().map_err(|reason| {
+ let reason: &str = &reason;
+ Error::invalid_value(Unexpected::Other("value"), &reason)
+ })?
+ }
+ Ok(url)
+ }
+
+ /// Assuming the URL is in the `file` scheme or similar,
+ /// convert its path to an absolute `std::path::Path`.
+ ///
+ /// **Note:** This does not actually check the URL’s `scheme`,
+ /// and may give nonsensical results for other schemes.
+ /// It is the user’s responsibility to check the URL’s scheme before calling this.
+ ///
+ /// ```
+ /// # use url::Url;
+ /// # let url = Url::parse("file:///etc/passwd").unwrap();
+ /// let path = url.to_file_path();
+ /// ```
+ ///
+ /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
+ /// `file:` URLs may have a non-local host),
+ /// or if `Path::new_opt()` returns `None`.
+ /// (That is, if the percent-decoded path contains a NUL byte or,
+ /// for a Windows path, is not UTF-8.)
+ #[inline]
+ #[cfg(any(unix, windows, target_os = "redox"))]
+ pub fn to_file_path(&self) -> Result<PathBuf, ()> {
+ if let Some(segments) = self.path_segments() {
+ let host = match self.host() {
+ None | Some(Host::Domain("localhost")) => None,
+ Some(_) if cfg!(windows) && self.scheme() == "file" => {
+ Some(&self.serialization[self.host_start as usize..self.host_end as usize])
+ }
+ _ => return Err(()),
+ };
+
+ return file_url_segments_to_pathbuf(host, segments);
+ }
+ Err(())
+ }
+
+ // Private helper methods:
+
+ #[inline]
+ fn slice<R>(&self, range: R) -> &str
+ where
+ R: RangeArg,
+ {
+ range.slice_of(&self.serialization)
+ }
+
+ #[inline]
+ fn byte_at(&self, i: u32) -> u8 {
+ self.serialization.as_bytes()[i as usize]
+ }
+}
+
+/// Parse a string as an URL, without a base URL or encoding override.
+impl str::FromStr for Url {
+ type Err = ParseError;
+
+ #[inline]
+ fn from_str(input: &str) -> Result<Url, ::ParseError> {
+ Url::parse(input)
+ }
+}
+
+/// Display the serialization of this URL.
+impl fmt::Display for Url {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&self.serialization, formatter)
+ }
+}
+
+/// Debug the serialization of this URL.
+impl fmt::Debug for Url {
+ #[inline]
+ fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Debug::fmt(&self.serialization, formatter)
+ }
+}
+
+/// URLs compare like their serialization.
+impl Eq for Url {}
+
+/// URLs compare like their serialization.
+impl PartialEq for Url {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ self.serialization == other.serialization
+ }
+}
+
+/// URLs compare like their serialization.
+impl Ord for Url {
+ #[inline]
+ fn cmp(&self, other: &Self) -> cmp::Ordering {
+ self.serialization.cmp(&other.serialization)
+ }
+}
+
+/// URLs compare like their serialization.
+impl PartialOrd for Url {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+ self.serialization.partial_cmp(&other.serialization)
+ }
+}
+
+/// URLs hash like their serialization.
+impl hash::Hash for Url {
+ #[inline]
+ fn hash<H>(&self, state: &mut H)
+ where
+ H: hash::Hasher,
+ {
+ hash::Hash::hash(&self.serialization, state)
+ }
+}
+
+/// Return the serialization of this URL.
+impl AsRef<str> for Url {
+ #[inline]
+ fn as_ref(&self) -> &str {
+ &self.serialization
+ }
+}
+
+trait RangeArg {
+ fn slice_of<'a>(&self, s: &'a str) -> &'a str;
+}
+
+impl RangeArg for Range<u32> {
+ #[inline]
+ fn slice_of<'a>(&self, s: &'a str) -> &'a str {
+ &s[self.start as usize..self.end as usize]
+ }
+}
+
+impl RangeArg for RangeFrom<u32> {
+ #[inline]
+ fn slice_of<'a>(&self, s: &'a str) -> &'a str {
+ &s[self.start as usize..]
+ }
+}
+
+impl RangeArg for RangeTo<u32> {
+ #[inline]
+ fn slice_of<'a>(&self, s: &'a str) -> &'a str {
+ &s[..self.end as usize]
+ }
+}
+
+/// Serializes this URL into a `serde` stream.
+///
+/// This implementation is only available if the `serde` Cargo feature is enabled.
+#[cfg(feature = "serde")]
+impl serde::Serialize for Url {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ serializer.serialize_str(self.as_str())
+ }
+}
+
+/// Deserializes this URL from a `serde` stream.
+///
+/// This implementation is only available if the `serde` Cargo feature is enabled.
+#[cfg(feature = "serde")]
+impl<'de> serde::Deserialize<'de> for Url {
+ fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
+ where
+ D: serde::Deserializer<'de>,
+ {
+ use serde::de::{Error, Unexpected, Visitor};
+
+ struct UrlVisitor;
+
+ impl<'de> Visitor<'de> for UrlVisitor {
+ type Value = Url;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("a string representing an URL")
+ }
+
+ fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
+ where
+ E: Error,
+ {
+ Url::parse(s)
+ .map_err(|err| Error::invalid_value(Unexpected::Str(s), &err.description()))
+ }
+ }
+
+ deserializer.deserialize_str(UrlVisitor)
+ }
+}
+
+#[cfg(any(unix, target_os = "redox"))]
+fn path_to_file_url_segments(
+ path: &Path,
+ serialization: &mut String,
+) -> Result<(u32, HostInternal), ()> {
+ use std::os::unix::prelude::OsStrExt;
+ if !path.is_absolute() {
+ return Err(());
+ }
+ let host_end = to_u32(serialization.len()).unwrap();
+ let mut empty = true;
+ // skip the root component
+ for component in path.components().skip(1) {
+ empty = false;
+ serialization.push('/');
+ serialization.extend(percent_encode(
+ component.as_os_str().as_bytes(),
+ PATH_SEGMENT,
+ ));
+ }
+ if empty {
+ // An URL’s path must not be empty.
+ serialization.push('/');
+ }
+ Ok((host_end, HostInternal::None))
+}
+
+#[cfg(windows)]
+fn path_to_file_url_segments(
+ path: &Path,
+ serialization: &mut String,
+) -> Result<(u32, HostInternal), ()> {
+ path_to_file_url_segments_windows(path, serialization)
+}
+
+// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
+#[cfg_attr(not(windows), allow(dead_code))]
+fn path_to_file_url_segments_windows(
+ path: &Path,
+ serialization: &mut String,
+) -> Result<(u32, HostInternal), ()> {
+ use std::path::{Component, Prefix};
+ if !path.is_absolute() {
+ return Err(());
+ }
+ let mut components = path.components();
+
+ let host_end;
+ let host_internal;
+ match components.next() {
+ Some(Component::Prefix(ref p)) => match p.kind() {
+ Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
+ host_end = to_u32(serialization.len()).unwrap();
+ host_internal = HostInternal::None;
+ serialization.push('/');
+ serialization.push(letter as char);
+ serialization.push(':');
+ }
+ Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
+ let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
+ write!(serialization, "{}", host).unwrap();
+ host_end = to_u32(serialization.len()).unwrap();
+ host_internal = host.into();
+ serialization.push('/');
+ let share = share.to_str().ok_or(())?;
+ serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
+ }
+ _ => return Err(()),
+ },
+
+ _ => return Err(()),
+ }
+
+ for component in components {
+ if component == Component::RootDir {
+ continue;
+ }
+ // FIXME: somehow work with non-unicode?
+ let component = component.as_os_str().to_str().ok_or(())?;
+ serialization.push('/');
+ serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
+ }
+ Ok((host_end, host_internal))
+}
+
+#[cfg(any(unix, target_os = "redox"))]
+fn file_url_segments_to_pathbuf(
+ host: Option<&str>,
+ segments: str::Split<char>,
+) -> Result<PathBuf, ()> {
+ use std::ffi::OsStr;
+ use std::os::unix::prelude::OsStrExt;
+
+ if host.is_some() {
+ return Err(());
+ }
+
+ let mut bytes = if cfg!(target_os = "redox") {
+ b"file:".to_vec()
+ } else {
+ Vec::new()
+ };
+ for segment in segments {
+ bytes.push(b'/');
+ bytes.extend(percent_decode(segment.as_bytes()));
+ }
+ let os_str = OsStr::from_bytes(&bytes);
+ let path = PathBuf::from(os_str);
+ debug_assert!(
+ path.is_absolute(),
+ "to_file_path() failed to produce an absolute Path"
+ );
+ Ok(path)
+}
+
+#[cfg(windows)]
+fn file_url_segments_to_pathbuf(
+ host: Option<&str>,
+ segments: str::Split<char>,
+) -> Result<PathBuf, ()> {
+ file_url_segments_to_pathbuf_windows(host, segments)
+}
+
+// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
+#[cfg_attr(not(windows), allow(dead_code))]
+fn file_url_segments_to_pathbuf_windows(
+ host: Option<&str>,
+ mut segments: str::Split<char>,
+) -> Result<PathBuf, ()> {
+ let mut string = if let Some(host) = host {
+ r"\\".to_owned() + host
+ } else {
+ let first = segments.next().ok_or(())?;
+
+ match first.len() {
+ 2 => {
+ if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
+ return Err(());
+ }
+
+ first.to_owned()
+ }
+
+ 4 => {
+ if !first.starts_with(parser::ascii_alpha) {
+ return Err(());
+ }
+ let bytes = first.as_bytes();
+ if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
+ return Err(());
+ }
+
+ first[0..1].to_owned() + ":"
+ }
+
+ _ => return Err(()),
+ }
+ };
+
+ for segment in segments {
+ string.push('\\');
+
+ // Currently non-unicode windows paths cannot be represented
+ match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
+ Ok(s) => string.push_str(&s),
+ Err(..) => return Err(()),
+ }
+ }
+ let path = PathBuf::from(string);
+ debug_assert!(
+ path.is_absolute(),
+ "to_file_path() failed to produce an absolute Path"
+ );
+ Ok(path)
+}
+
+/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
+#[derive(Debug)]
+pub struct UrlQuery<'a> {
+ url: Option<&'a mut Url>,
+ fragment: Option<String>,
+}
+
+impl<'a> Drop for UrlQuery<'a> {
+ fn drop(&mut self) {
+ if let Some(url) = self.url.take() {
+ url.restore_already_parsed_fragment(self.fragment.take())
+ }
+ }
+}
diff --git a/third_party/rust/url/src/origin.rs b/third_party/rust/url/src/origin.rs
new file mode 100644
index 0000000000..3223709dde
--- /dev/null
+++ b/third_party/rust/url/src/origin.rs
@@ -0,0 +1,113 @@
+// Copyright 2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use host::Host;
+use idna::domain_to_unicode;
+use parser::default_port;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use Url;
+
+pub fn url_origin(url: &Url) -> Origin {
+ let scheme = url.scheme();
+ match scheme {
+ "blob" => {
+ let result = Url::parse(url.path());
+ match result {
+ Ok(ref url) => url_origin(url),
+ Err(_) => Origin::new_opaque(),
+ }
+ }
+ "ftp" | "gopher" | "http" | "https" | "ws" | "wss" => Origin::Tuple(
+ scheme.to_owned(),
+ url.host().unwrap().to_owned(),
+ url.port_or_known_default().unwrap(),
+ ),
+ // TODO: Figure out what to do if the scheme is a file
+ "file" => Origin::new_opaque(),
+ _ => Origin::new_opaque(),
+ }
+}
+
+/// The origin of an URL
+///
+/// Two URLs with the same origin are considered
+/// to originate from the same entity and can therefore trust
+/// each other.
+///
+/// The origin is determined based on the scheme as follows:
+///
+/// - If the scheme is "blob" the origin is the origin of the
+/// URL contained in the path component. If parsing fails,
+/// it is an opaque origin.
+/// - If the scheme is "ftp", "gopher", "http", "https", "ws", or "wss",
+/// then the origin is a tuple of the scheme, host, and port.
+/// - If the scheme is anything else, the origin is opaque, meaning
+/// the URL does not have the same origin as any other URL.
+///
+/// For more information see <https://url.spec.whatwg.org/#origin>
+#[derive(PartialEq, Eq, Hash, Clone, Debug)]
+pub enum Origin {
+ /// A globally unique identifier
+ Opaque(OpaqueOrigin),
+
+ /// Consists of the URL's scheme, host and port
+ Tuple(String, Host<String>, u16),
+}
+
+impl Origin {
+ /// Creates a new opaque origin that is only equal to itself.
+ pub fn new_opaque() -> Origin {
+ static COUNTER: AtomicUsize = AtomicUsize::new(0);
+ Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst)))
+ }
+
+ /// Return whether this origin is a (scheme, host, port) tuple
+ /// (as opposed to an opaque origin).
+ pub fn is_tuple(&self) -> bool {
+ matches!(*self, Origin::Tuple(..))
+ }
+
+ /// <https://html.spec.whatwg.org/multipage/#ascii-serialisation-of-an-origin>
+ pub fn ascii_serialization(&self) -> String {
+ match *self {
+ Origin::Opaque(_) => "null".to_owned(),
+ Origin::Tuple(ref scheme, ref host, port) => {
+ if default_port(scheme) == Some(port) {
+ format!("{}://{}", scheme, host)
+ } else {
+ format!("{}://{}:{}", scheme, host, port)
+ }
+ }
+ }
+ }
+
+ /// <https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin>
+ pub fn unicode_serialization(&self) -> String {
+ match *self {
+ Origin::Opaque(_) => "null".to_owned(),
+ Origin::Tuple(ref scheme, ref host, port) => {
+ let host = match *host {
+ Host::Domain(ref domain) => {
+ let (domain, _errors) = domain_to_unicode(domain);
+ Host::Domain(domain)
+ }
+ _ => host.clone(),
+ };
+ if default_port(scheme) == Some(port) {
+ format!("{}://{}", scheme, host)
+ } else {
+ format!("{}://{}:{}", scheme, host, port)
+ }
+ }
+ }
+ }
+}
+
+/// Opaque identifier for URLs that have file or other schemes
+#[derive(Eq, PartialEq, Hash, Clone, Debug)]
+pub struct OpaqueOrigin(usize);
diff --git a/third_party/rust/url/src/parser.rs b/third_party/rust/url/src/parser.rs
new file mode 100644
index 0000000000..e2ea36bfab
--- /dev/null
+++ b/third_party/rust/url/src/parser.rs
@@ -0,0 +1,1426 @@
+// Copyright 2013-2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::error::Error;
+use std::fmt::{self, Formatter, Write};
+use std::str;
+
+use host::{Host, HostInternal};
+use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};
+use query_encoding::EncodingOverride;
+use Url;
+
+/// https://url.spec.whatwg.org/#fragment-percent-encode-set
+const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
+
+/// https://url.spec.whatwg.org/#path-percent-encode-set
+const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
+
+/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
+pub(crate) const USERINFO: &AsciiSet = &PATH
+ .add(b'/')
+ .add(b':')
+ .add(b';')
+ .add(b'=')
+ .add(b'@')
+ .add(b'[')
+ .add(b'\\')
+ .add(b']')
+ .add(b'^')
+ .add(b'|');
+
+pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
+
+// The backslash (\) character is treated as a path separator in special URLs
+// so it needs to be additionally escaped in that case.
+pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
+
+// https://url.spec.whatwg.org/#query-state
+const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
+const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');
+
+pub type ParseResult<T> = Result<T, ParseError>;
+
+macro_rules! simple_enum_error {
+ ($($name: ident => $description: expr,)+) => {
+ /// Errors that can occur during parsing.
+ ///
+ /// This may be extended in the future so exhaustive matching is
+ /// discouraged with an unused variant.
+ #[derive(PartialEq, Eq, Clone, Copy, Debug)]
+ pub enum ParseError {
+ $(
+ $name,
+ )+
+ /// Unused variant enable non-exhaustive matching
+ #[doc(hidden)]
+ __FutureProof,
+ }
+
+ impl Error for ParseError {
+ fn description(&self) -> &str {
+ match *self {
+ $(
+ ParseError::$name => $description,
+ )+
+ ParseError::__FutureProof => {
+ unreachable!("Don't abuse the FutureProof!");
+ }
+ }
+ }
+ }
+ }
+}
+
+simple_enum_error! {
+ EmptyHost => "empty host",
+ IdnaError => "invalid international domain name",
+ InvalidPort => "invalid port number",
+ InvalidIpv4Address => "invalid IPv4 address",
+ InvalidIpv6Address => "invalid IPv6 address",
+ InvalidDomainCharacter => "invalid domain character",
+ RelativeUrlWithoutBase => "relative URL without a base",
+ RelativeUrlWithCannotBeABaseBase => "relative URL with a cannot-be-a-base base",
+ SetHostOnCannotBeABaseUrl => "a cannot-be-a-base URL doesn’t have a host to set",
+ Overflow => "URLs more than 4 GB are not supported",
+}
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ fmt::Display::fmt(self.description(), f)
+ }
+}
+
+impl From<::idna::Errors> for ParseError {
+ fn from(_: ::idna::Errors) -> ParseError {
+ ParseError::IdnaError
+ }
+}
+
+macro_rules! syntax_violation_enum {
+ ($($name: ident => $description: expr,)+) => {
+ /// Non-fatal syntax violations that can occur during parsing.
+ ///
+ /// This may be extended in the future so exhaustive matching is
+ /// discouraged with an unused variant.
+ #[derive(PartialEq, Eq, Clone, Copy, Debug)]
+ pub enum SyntaxViolation {
+ $(
+ $name,
+ )+
+ /// Unused variant enable non-exhaustive matching
+ #[doc(hidden)]
+ __FutureProof,
+ }
+
+ impl SyntaxViolation {
+ pub fn description(&self) -> &'static str {
+ match *self {
+ $(
+ SyntaxViolation::$name => $description,
+ )+
+ SyntaxViolation::__FutureProof => {
+ unreachable!("Don't abuse the FutureProof!");
+ }
+ }
+ }
+ }
+ }
+}
+
+syntax_violation_enum! {
+ Backslash => "backslash",
+ C0SpaceIgnored =>
+ "leading or trailing control or space character are ignored in URLs",
+ EmbeddedCredentials =>
+ "embedding authentication information (username or password) \
+ in an URL is not recommended",
+ ExpectedDoubleSlash => "expected //",
+ ExpectedFileDoubleSlash => "expected // after file:",
+ FileWithHostAndWindowsDrive => "file: with host and Windows drive letter",
+ NonUrlCodePoint => "non-URL code point",
+ NullInFragment => "NULL characters are ignored in URL fragment identifiers",
+ PercentDecode => "expected 2 hex digits after %",
+ TabOrNewlineIgnored => "tabs or newlines are ignored in URLs",
+ UnencodedAtSign => "unencoded @ sign in username or password",
+}
+
+impl fmt::Display for SyntaxViolation {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ fmt::Display::fmt(self.description(), f)
+ }
+}
+
+#[derive(Copy, Clone)]
+pub enum SchemeType {
+ File,
+ SpecialNotFile,
+ NotSpecial,
+}
+
+impl SchemeType {
+ pub fn is_special(&self) -> bool {
+ !matches!(*self, SchemeType::NotSpecial)
+ }
+
+ pub fn is_file(&self) -> bool {
+ matches!(*self, SchemeType::File)
+ }
+
+ pub fn from(s: &str) -> Self {
+ match s {
+ "http" | "https" | "ws" | "wss" | "ftp" | "gopher" => SchemeType::SpecialNotFile,
+ "file" => SchemeType::File,
+ _ => SchemeType::NotSpecial,
+ }
+ }
+}
+
+pub fn default_port(scheme: &str) -> Option<u16> {
+ match scheme {
+ "http" | "ws" => Some(80),
+ "https" | "wss" => Some(443),
+ "ftp" => Some(21),
+ "gopher" => Some(70),
+ _ => None,
+ }
+}
+
+#[derive(Clone)]
+pub struct Input<'i> {
+ chars: str::Chars<'i>,
+}
+
+impl<'i> Input<'i> {
+ pub fn new(input: &'i str) -> Self {
+ Input::with_log(input, None)
+ }
+
+ pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self {
+ let input = original_input.trim_matches(c0_control_or_space);
+ if let Some(vfn) = vfn {
+ if input.len() < original_input.len() {
+ vfn(SyntaxViolation::C0SpaceIgnored)
+ }
+ if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) {
+ vfn(SyntaxViolation::TabOrNewlineIgnored)
+ }
+ }
+ Input {
+ chars: input.chars(),
+ }
+ }
+
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.clone().next().is_none()
+ }
+
+ #[inline]
+ fn starts_with<P: Pattern>(&self, p: P) -> bool {
+ p.split_prefix(&mut self.clone())
+ }
+
+ #[inline]
+ pub fn split_prefix<P: Pattern>(&self, p: P) -> Option<Self> {
+ let mut remaining = self.clone();
+ if p.split_prefix(&mut remaining) {
+ Some(remaining)
+ } else {
+ None
+ }
+ }
+
+ #[inline]
+ fn split_first(&self) -> (Option<char>, Self) {
+ let mut remaining = self.clone();
+ (remaining.next(), remaining)
+ }
+
+ #[inline]
+ fn count_matching<F: Fn(char) -> bool>(&self, f: F) -> (u32, Self) {
+ let mut count = 0;
+ let mut remaining = self.clone();
+ loop {
+ let mut input = remaining.clone();
+ if matches!(input.next(), Some(c) if f(c)) {
+ remaining = input;
+ count += 1;
+ } else {
+ return (count, remaining);
+ }
+ }
+ }
+
+ #[inline]
+ fn next_utf8(&mut self) -> Option<(char, &'i str)> {
+ loop {
+ let utf8 = self.chars.as_str();
+ match self.chars.next() {
+ Some(c) => {
+ if !matches!(c, '\t' | '\n' | '\r') {
+ return Some((c, &utf8[..c.len_utf8()]));
+ }
+ }
+ None => return None,
+ }
+ }
+ }
+}
+
+pub trait Pattern {
+ fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool;
+}
+
+impl Pattern for char {
+ fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
+ input.next() == Some(self)
+ }
+}
+
+impl<'a> Pattern for &'a str {
+ fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
+ for c in self.chars() {
+ if input.next() != Some(c) {
+ return false;
+ }
+ }
+ true
+ }
+}
+
+impl<F: FnMut(char) -> bool> Pattern for F {
+ fn split_prefix<'i>(self, input: &mut Input<'i>) -> bool {
+ input.next().map_or(false, self)
+ }
+}
+
+impl<'i> Iterator for Input<'i> {
+ type Item = char;
+ fn next(&mut self) -> Option<char> {
+ self.chars
+ .by_ref()
+ .find(|&c| !matches!(c, '\t' | '\n' | '\r'))
+ }
+}
+
+pub struct Parser<'a> {
+ pub serialization: String,
+ pub base_url: Option<&'a Url>,
+ pub query_encoding_override: EncodingOverride<'a>,
+ pub violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
+ pub context: Context,
+}
+
+#[derive(PartialEq, Eq, Copy, Clone)]
+pub enum Context {
+ UrlParser,
+ Setter,
+ PathSegmentSetter,
+}
+
+impl<'a> Parser<'a> {
+ fn log_violation(&self, v: SyntaxViolation) {
+ if let Some(f) = self.violation_fn {
+ f(v)
+ }
+ }
+
+ fn log_violation_if(&self, v: SyntaxViolation, test: impl FnOnce() -> bool) {
+ if let Some(f) = self.violation_fn {
+ if test() {
+ f(v)
+ }
+ }
+ }
+
+ pub fn for_setter(serialization: String) -> Parser<'a> {
+ Parser {
+ serialization,
+ base_url: None,
+ query_encoding_override: None,
+ violation_fn: None,
+ context: Context::Setter,
+ }
+ }
+
+ /// https://url.spec.whatwg.org/#concept-basic-url-parser
+ pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
+ let input = Input::with_log(input, self.violation_fn);
+ if let Ok(remaining) = self.parse_scheme(input.clone()) {
+ return self.parse_with_scheme(remaining);
+ }
+
+ // No-scheme state
+ if let Some(base_url) = self.base_url {
+ if input.starts_with('#') {
+ self.fragment_only(base_url, input)
+ } else if base_url.cannot_be_a_base() {
+ Err(ParseError::RelativeUrlWithCannotBeABaseBase)
+ } else {
+ let scheme_type = SchemeType::from(base_url.scheme());
+ if scheme_type.is_file() {
+ self.parse_file(input, scheme_type, Some(base_url))
+ } else {
+ self.parse_relative(input, scheme_type, base_url)
+ }
+ }
+ } else {
+ Err(ParseError::RelativeUrlWithoutBase)
+ }
+ }
+
+ pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
+ if input.is_empty() || !input.starts_with(ascii_alpha) {
+ return Err(());
+ }
+ debug_assert!(self.serialization.is_empty());
+ while let Some(c) = input.next() {
+ match c {
+ 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => {
+ self.serialization.push(c.to_ascii_lowercase())
+ }
+ ':' => return Ok(input),
+ _ => {
+ self.serialization.clear();
+ return Err(());
+ }
+ }
+ }
+ // EOF before ':'
+ if self.context == Context::Setter {
+ Ok(input)
+ } else {
+ self.serialization.clear();
+ Err(())
+ }
+ }
+
+ fn parse_with_scheme(mut self, input: Input) -> ParseResult<Url> {
+ use SyntaxViolation::{ExpectedDoubleSlash, ExpectedFileDoubleSlash};
+ let scheme_end = to_u32(self.serialization.len())?;
+ let scheme_type = SchemeType::from(&self.serialization);
+ self.serialization.push(':');
+ match scheme_type {
+ SchemeType::File => {
+ self.log_violation_if(ExpectedFileDoubleSlash, || !input.starts_with("//"));
+ let base_file_url = self.base_url.and_then(|base| {
+ if base.scheme() == "file" {
+ Some(base)
+ } else {
+ None
+ }
+ });
+ self.serialization.clear();
+ self.parse_file(input, scheme_type, base_file_url)
+ }
+ SchemeType::SpecialNotFile => {
+ // special relative or authority state
+ let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
+ if let Some(base_url) = self.base_url {
+ if slashes_count < 2
+ && base_url.scheme() == &self.serialization[..scheme_end as usize]
+ {
+ // "Cannot-be-a-base" URLs only happen with "not special" schemes.
+ debug_assert!(!base_url.cannot_be_a_base());
+ self.serialization.clear();
+ return self.parse_relative(input, scheme_type, base_url);
+ }
+ }
+ // special authority slashes state
+ self.log_violation_if(ExpectedDoubleSlash, || {
+ input
+ .clone()
+ .take_while(|&c| matches!(c, '/' | '\\'))
+ .collect::<String>()
+ != "//"
+ });
+ self.after_double_slash(remaining, scheme_type, scheme_end)
+ }
+ SchemeType::NotSpecial => self.parse_non_special(input, scheme_type, scheme_end),
+ }
+ }
+
+ /// Scheme other than file, http, https, ws, ws, ftp, gopher.
+ fn parse_non_special(
+ mut self,
+ input: Input,
+ scheme_type: SchemeType,
+ scheme_end: u32,
+ ) -> ParseResult<Url> {
+ // path or authority state (
+ if let Some(input) = input.split_prefix("//") {
+ return self.after_double_slash(input, scheme_type, scheme_end);
+ }
+ // Anarchist URL (no authority)
+ let path_start = to_u32(self.serialization.len())?;
+ let username_end = path_start;
+ let host_start = path_start;
+ let host_end = path_start;
+ let host = HostInternal::None;
+ let port = None;
+ let remaining = if let Some(input) = input.split_prefix('/') {
+ let path_start = self.serialization.len();
+ self.serialization.push('/');
+ self.parse_path(scheme_type, &mut false, path_start, input)
+ } else {
+ self.parse_cannot_be_a_base_path(input)
+ };
+ self.with_query_and_fragment(
+ scheme_type,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ remaining,
+ )
+ }
+
+ fn parse_file(
+ mut self,
+ input: Input,
+ scheme_type: SchemeType,
+ mut base_file_url: Option<&Url>,
+ ) -> ParseResult<Url> {
+ use SyntaxViolation::Backslash;
+ // file state
+ debug_assert!(self.serialization.is_empty());
+ let (first_char, input_after_first_char) = input.split_first();
+ match first_char {
+ None => {
+ if let Some(base_url) = base_file_url {
+ // Copy everything except the fragment
+ let before_fragment = match base_url.fragment_start {
+ Some(i) => &base_url.serialization[..i as usize],
+ None => &*base_url.serialization,
+ };
+ self.serialization.push_str(before_fragment);
+ Ok(Url {
+ serialization: self.serialization,
+ fragment_start: None,
+ ..*base_url
+ })
+ } else {
+ self.serialization.push_str("file:///");
+ let scheme_end = "file".len() as u32;
+ let path_start = "file://".len() as u32;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: path_start,
+ host_start: path_start,
+ host_end: path_start,
+ host: HostInternal::None,
+ port: None,
+ path_start,
+ query_start: None,
+ fragment_start: None,
+ })
+ }
+ }
+ Some('?') => {
+ if let Some(base_url) = base_file_url {
+ // Copy everything up to the query string
+ let before_query = match (base_url.query_start, base_url.fragment_start) {
+ (None, None) => &*base_url.serialization,
+ (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
+ };
+ self.serialization.push_str(before_query);
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
+ Ok(Url {
+ serialization: self.serialization,
+ query_start,
+ fragment_start,
+ ..*base_url
+ })
+ } else {
+ self.serialization.push_str("file:///");
+ let scheme_end = "file".len() as u32;
+ let path_start = "file://".len() as u32;
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, scheme_end, input)?;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: path_start,
+ host_start: path_start,
+ host_end: path_start,
+ host: HostInternal::None,
+ port: None,
+ path_start,
+ query_start,
+ fragment_start,
+ })
+ }
+ }
+ Some('#') => {
+ if let Some(base_url) = base_file_url {
+ self.fragment_only(base_url, input)
+ } else {
+ self.serialization.push_str("file:///");
+ let scheme_end = "file".len() as u32;
+ let path_start = "file://".len() as u32;
+ let fragment_start = "file:///".len() as u32;
+ self.serialization.push('#');
+ self.parse_fragment(input_after_first_char);
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: path_start,
+ host_start: path_start,
+ host_end: path_start,
+ host: HostInternal::None,
+ port: None,
+ path_start,
+ query_start: None,
+ fragment_start: Some(fragment_start),
+ })
+ }
+ }
+ Some('/') | Some('\\') => {
+ self.log_violation_if(Backslash, || first_char == Some('\\'));
+ // file slash state
+ let (next_char, input_after_next_char) = input_after_first_char.split_first();
+ self.log_violation_if(Backslash, || next_char == Some('\\'));
+ if matches!(next_char, Some('/') | Some('\\')) {
+ // file host state
+ self.serialization.push_str("file://");
+ let scheme_end = "file".len() as u32;
+ let host_start = "file://".len() as u32;
+ let (path_start, mut host, remaining) =
+ self.parse_file_host(input_after_next_char)?;
+ let mut host_end = to_u32(self.serialization.len())?;
+ let mut has_host = !matches!(host, HostInternal::None);
+ let remaining = if path_start {
+ self.parse_path_start(SchemeType::File, &mut has_host, remaining)
+ } else {
+ let path_start = self.serialization.len();
+ self.serialization.push('/');
+ self.parse_path(SchemeType::File, &mut has_host, path_start, remaining)
+ };
+ // For file URLs that have a host and whose path starts
+ // with the windows drive letter we just remove the host.
+ if !has_host {
+ self.serialization
+ .drain(host_start as usize..host_end as usize);
+ host_end = host_start;
+ host = HostInternal::None;
+ }
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: host_start,
+ host_start,
+ host_end,
+ host,
+ port: None,
+ path_start: host_end,
+ query_start,
+ fragment_start,
+ })
+ } else {
+ self.serialization.push_str("file:///");
+ let scheme_end = "file".len() as u32;
+ let path_start = "file://".len();
+ if let Some(base_url) = base_file_url {
+ let first_segment = base_url.path_segments().unwrap().next().unwrap();
+ // FIXME: *normalized* drive letter
+ if is_windows_drive_letter(first_segment) {
+ self.serialization.push_str(first_segment);
+ self.serialization.push('/');
+ }
+ }
+ let remaining = self.parse_path(
+ SchemeType::File,
+ &mut false,
+ path_start,
+ input_after_first_char,
+ );
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
+ let path_start = path_start as u32;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: path_start,
+ host_start: path_start,
+ host_end: path_start,
+ host: HostInternal::None,
+ port: None,
+ path_start,
+ query_start,
+ fragment_start,
+ })
+ }
+ }
+ _ => {
+ if starts_with_windows_drive_letter_segment(&input) {
+ base_file_url = None;
+ }
+ if let Some(base_url) = base_file_url {
+ let before_query = match (base_url.query_start, base_url.fragment_start) {
+ (None, None) => &*base_url.serialization,
+ (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
+ };
+ self.serialization.push_str(before_query);
+ self.pop_path(SchemeType::File, base_url.path_start as usize);
+ let remaining = self.parse_path(
+ SchemeType::File,
+ &mut true,
+ base_url.path_start as usize,
+ input,
+ );
+ self.with_query_and_fragment(
+ SchemeType::File,
+ base_url.scheme_end,
+ base_url.username_end,
+ base_url.host_start,
+ base_url.host_end,
+ base_url.host,
+ base_url.port,
+ base_url.path_start,
+ remaining,
+ )
+ } else {
+ self.serialization.push_str("file:///");
+ let scheme_end = "file".len() as u32;
+ let path_start = "file://".len();
+ let remaining =
+ self.parse_path(SchemeType::File, &mut false, path_start, input);
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;
+ let path_start = path_start as u32;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end: path_start,
+ host_start: path_start,
+ host_end: path_start,
+ host: HostInternal::None,
+ port: None,
+ path_start,
+ query_start,
+ fragment_start,
+ })
+ }
+ }
+ }
+ }
+
+ fn parse_relative(
+ mut self,
+ input: Input,
+ scheme_type: SchemeType,
+ base_url: &Url,
+ ) -> ParseResult<Url> {
+ // relative state
+ debug_assert!(self.serialization.is_empty());
+ let (first_char, input_after_first_char) = input.split_first();
+ match first_char {
+ None => {
+ // Copy everything except the fragment
+ let before_fragment = match base_url.fragment_start {
+ Some(i) => &base_url.serialization[..i as usize],
+ None => &*base_url.serialization,
+ };
+ self.serialization.push_str(before_fragment);
+ Ok(Url {
+ serialization: self.serialization,
+ fragment_start: None,
+ ..*base_url
+ })
+ }
+ Some('?') => {
+ // Copy everything up to the query string
+ let before_query = match (base_url.query_start, base_url.fragment_start) {
+ (None, None) => &*base_url.serialization,
+ (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
+ };
+ self.serialization.push_str(before_query);
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, base_url.scheme_end, input)?;
+ Ok(Url {
+ serialization: self.serialization,
+ query_start,
+ fragment_start,
+ ..*base_url
+ })
+ }
+ Some('#') => self.fragment_only(base_url, input),
+ Some('/') | Some('\\') => {
+ let (slashes_count, remaining) = input.count_matching(|c| matches!(c, '/' | '\\'));
+ if slashes_count >= 2 {
+ self.log_violation_if(SyntaxViolation::ExpectedDoubleSlash, || {
+ input
+ .clone()
+ .take_while(|&c| matches!(c, '/' | '\\'))
+ .collect::<String>()
+ != "//"
+ });
+ let scheme_end = base_url.scheme_end;
+ debug_assert!(base_url.byte_at(scheme_end) == b':');
+ self.serialization
+ .push_str(base_url.slice(..scheme_end + 1));
+ return self.after_double_slash(remaining, scheme_type, scheme_end);
+ }
+ let path_start = base_url.path_start;
+ debug_assert!(base_url.byte_at(path_start) == b'/');
+ self.serialization
+ .push_str(base_url.slice(..path_start + 1));
+ let remaining = self.parse_path(
+ scheme_type,
+ &mut true,
+ path_start as usize,
+ input_after_first_char,
+ );
+ self.with_query_and_fragment(
+ scheme_type,
+ base_url.scheme_end,
+ base_url.username_end,
+ base_url.host_start,
+ base_url.host_end,
+ base_url.host,
+ base_url.port,
+ base_url.path_start,
+ remaining,
+ )
+ }
+ _ => {
+ let before_query = match (base_url.query_start, base_url.fragment_start) {
+ (None, None) => &*base_url.serialization,
+ (Some(i), _) | (None, Some(i)) => base_url.slice(..i),
+ };
+ self.serialization.push_str(before_query);
+ // FIXME spec says just "remove last entry", not the "pop" algorithm
+ self.pop_path(scheme_type, base_url.path_start as usize);
+ let remaining =
+ self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input);
+ self.with_query_and_fragment(
+ scheme_type,
+ base_url.scheme_end,
+ base_url.username_end,
+ base_url.host_start,
+ base_url.host_end,
+ base_url.host,
+ base_url.port,
+ base_url.path_start,
+ remaining,
+ )
+ }
+ }
+ }
+
+ fn after_double_slash(
+ mut self,
+ input: Input,
+ scheme_type: SchemeType,
+ scheme_end: u32,
+ ) -> ParseResult<Url> {
+ self.serialization.push('/');
+ self.serialization.push('/');
+ // authority state
+ let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?;
+ // host state
+ let host_start = to_u32(self.serialization.len())?;
+ let (host_end, host, port, remaining) =
+ self.parse_host_and_port(remaining, scheme_end, scheme_type)?;
+ // path state
+ let path_start = to_u32(self.serialization.len())?;
+ let remaining = self.parse_path_start(scheme_type, &mut true, remaining);
+ self.with_query_and_fragment(
+ scheme_type,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ remaining,
+ )
+ }
+
+ /// Return (username_end, remaining)
+ fn parse_userinfo<'i>(
+ &mut self,
+ mut input: Input<'i>,
+ scheme_type: SchemeType,
+ ) -> ParseResult<(u32, Input<'i>)> {
+ let mut last_at = None;
+ let mut remaining = input.clone();
+ let mut char_count = 0;
+ while let Some(c) = remaining.next() {
+ match c {
+ '@' => {
+ if last_at.is_some() {
+ self.log_violation(SyntaxViolation::UnencodedAtSign)
+ } else {
+ self.log_violation(SyntaxViolation::EmbeddedCredentials)
+ }
+ last_at = Some((char_count, remaining.clone()))
+ }
+ '/' | '?' | '#' => break,
+ '\\' if scheme_type.is_special() => break,
+ _ => (),
+ }
+ char_count += 1;
+ }
+ let (mut userinfo_char_count, remaining) = match last_at {
+ None => return Ok((to_u32(self.serialization.len())?, input)),
+ Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)),
+ Some(x) => x,
+ };
+
+ let mut username_end = None;
+ let mut has_password = false;
+ let mut has_username = false;
+ while userinfo_char_count > 0 {
+ let (c, utf8_c) = input.next_utf8().unwrap();
+ userinfo_char_count -= 1;
+ if c == ':' && username_end.is_none() {
+ // Start parsing password
+ username_end = Some(to_u32(self.serialization.len())?);
+ // We don't add a colon if the password is empty
+ if userinfo_char_count > 0 {
+ self.serialization.push(':');
+ has_password = true;
+ }
+ } else {
+ if !has_password {
+ has_username = true;
+ }
+ self.check_url_code_point(c, &input);
+ self.serialization
+ .extend(utf8_percent_encode(utf8_c, USERINFO));
+ }
+ }
+ let username_end = match username_end {
+ Some(i) => i,
+ None => to_u32(self.serialization.len())?,
+ };
+ if has_username || has_password {
+ self.serialization.push('@');
+ }
+ Ok((username_end, remaining))
+ }
+
+ fn parse_host_and_port<'i>(
+ &mut self,
+ input: Input<'i>,
+ scheme_end: u32,
+ scheme_type: SchemeType,
+ ) -> ParseResult<(u32, HostInternal, Option<u16>, Input<'i>)> {
+ let (host, remaining) = Parser::parse_host(input, scheme_type)?;
+ write!(&mut self.serialization, "{}", host).unwrap();
+ let host_end = to_u32(self.serialization.len())?;
+ let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') {
+ let scheme = || default_port(&self.serialization[..scheme_end as usize]);
+ Parser::parse_port(remaining, scheme, self.context)?
+ } else {
+ (None, remaining)
+ };
+ if let Some(port) = port {
+ write!(&mut self.serialization, ":{}", port).unwrap()
+ }
+ Ok((host_end, host.into(), port, remaining))
+ }
+
+ pub fn parse_host(
+ mut input: Input,
+ scheme_type: SchemeType,
+ ) -> ParseResult<(Host<String>, Input)> {
+ // Undo the Input abstraction here to avoid allocating in the common case
+ // where the host part of the input does not contain any tab or newline
+ let input_str = input.chars.as_str();
+ let mut inside_square_brackets = false;
+ let mut has_ignored_chars = false;
+ let mut non_ignored_chars = 0;
+ let mut bytes = 0;
+ for c in input_str.chars() {
+ match c {
+ ':' if !inside_square_brackets => break,
+ '\\' if scheme_type.is_special() => break,
+ '/' | '?' | '#' => break,
+ '\t' | '\n' | '\r' => {
+ has_ignored_chars = true;
+ }
+ '[' => {
+ inside_square_brackets = true;
+ non_ignored_chars += 1
+ }
+ ']' => {
+ inside_square_brackets = false;
+ non_ignored_chars += 1
+ }
+ _ => non_ignored_chars += 1,
+ }
+ bytes += c.len_utf8();
+ }
+ let replaced: String;
+ let host_str;
+ {
+ let host_input = input.by_ref().take(non_ignored_chars);
+ if has_ignored_chars {
+ replaced = host_input.collect();
+ host_str = &*replaced
+ } else {
+ for _ in host_input {}
+ host_str = &input_str[..bytes]
+ }
+ }
+ if scheme_type.is_special() && host_str.is_empty() {
+ return Err(ParseError::EmptyHost);
+ }
+ if !scheme_type.is_special() {
+ let host = Host::parse_opaque(host_str)?;
+ return Ok((host, input));
+ }
+ let host = Host::parse(host_str)?;
+ Ok((host, input))
+ }
+
+ pub(crate) fn parse_file_host<'i>(
+ &mut self,
+ input: Input<'i>,
+ ) -> ParseResult<(bool, HostInternal, Input<'i>)> {
+ // Undo the Input abstraction here to avoid allocating in the common case
+ // where the host part of the input does not contain any tab or newline
+ let input_str = input.chars.as_str();
+ let mut has_ignored_chars = false;
+ let mut non_ignored_chars = 0;
+ let mut bytes = 0;
+ for c in input_str.chars() {
+ match c {
+ '/' | '\\' | '?' | '#' => break,
+ '\t' | '\n' | '\r' => has_ignored_chars = true,
+ _ => non_ignored_chars += 1,
+ }
+ bytes += c.len_utf8();
+ }
+ let replaced: String;
+ let host_str;
+ let mut remaining = input.clone();
+ {
+ let host_input = remaining.by_ref().take(non_ignored_chars);
+ if has_ignored_chars {
+ replaced = host_input.collect();
+ host_str = &*replaced
+ } else {
+ for _ in host_input {}
+ host_str = &input_str[..bytes]
+ }
+ }
+ if is_windows_drive_letter(host_str) {
+ return Ok((false, HostInternal::None, input));
+ }
+ let host = if host_str.is_empty() {
+ HostInternal::None
+ } else {
+ match Host::parse(host_str)? {
+ Host::Domain(ref d) if d == "localhost" => HostInternal::None,
+ host => {
+ write!(&mut self.serialization, "{}", host).unwrap();
+ host.into()
+ }
+ }
+ };
+ Ok((true, host, remaining))
+ }
+
+ pub fn parse_port<P>(
+ mut input: Input,
+ default_port: P,
+ context: Context,
+ ) -> ParseResult<(Option<u16>, Input)>
+ where
+ P: Fn() -> Option<u16>,
+ {
+ let mut port: u32 = 0;
+ let mut has_any_digit = false;
+ while let (Some(c), remaining) = input.split_first() {
+ if let Some(digit) = c.to_digit(10) {
+ port = port * 10 + digit;
+ if port > ::std::u16::MAX as u32 {
+ return Err(ParseError::InvalidPort);
+ }
+ has_any_digit = true;
+ } else if context == Context::UrlParser && !matches!(c, '/' | '\\' | '?' | '#') {
+ return Err(ParseError::InvalidPort);
+ } else {
+ break;
+ }
+ input = remaining;
+ }
+ let mut opt_port = Some(port as u16);
+ if !has_any_digit || opt_port == default_port() {
+ opt_port = None;
+ }
+ Ok((opt_port, input))
+ }
+
+ pub fn parse_path_start<'i>(
+ &mut self,
+ scheme_type: SchemeType,
+ has_host: &mut bool,
+ mut input: Input<'i>,
+ ) -> Input<'i> {
+ // Path start state
+ match input.split_first() {
+ (Some('/'), remaining) => input = remaining,
+ (Some('\\'), remaining) => {
+ if scheme_type.is_special() {
+ self.log_violation(SyntaxViolation::Backslash);
+ input = remaining
+ }
+ }
+ _ => {}
+ }
+ let path_start = self.serialization.len();
+ self.serialization.push('/');
+ self.parse_path(scheme_type, has_host, path_start, input)
+ }
+
+ pub fn parse_path<'i>(
+ &mut self,
+ scheme_type: SchemeType,
+ has_host: &mut bool,
+ path_start: usize,
+ mut input: Input<'i>,
+ ) -> Input<'i> {
+ // Relative path state
+ debug_assert!(self.serialization.ends_with('/'));
+ loop {
+ let segment_start = self.serialization.len();
+ let mut ends_with_slash = false;
+ loop {
+ let input_before_c = input.clone();
+ let (c, utf8_c) = if let Some(x) = input.next_utf8() {
+ x
+ } else {
+ break;
+ };
+ match c {
+ '/' if self.context != Context::PathSegmentSetter => {
+ ends_with_slash = true;
+ break;
+ }
+ '\\' if self.context != Context::PathSegmentSetter
+ && scheme_type.is_special() =>
+ {
+ self.log_violation(SyntaxViolation::Backslash);
+ ends_with_slash = true;
+ break;
+ }
+ '?' | '#' if self.context == Context::UrlParser => {
+ input = input_before_c;
+ break;
+ }
+ _ => {
+ self.check_url_code_point(c, &input);
+ if self.context == Context::PathSegmentSetter {
+ if scheme_type.is_special() {
+ self.serialization
+ .extend(utf8_percent_encode(utf8_c, SPECIAL_PATH_SEGMENT));
+ } else {
+ self.serialization
+ .extend(utf8_percent_encode(utf8_c, PATH_SEGMENT));
+ }
+ } else {
+ self.serialization.extend(utf8_percent_encode(utf8_c, PATH));
+ }
+ }
+ }
+ }
+ match &self.serialization[segment_start..] {
+ ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
+ | ".%2E" => {
+ debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/');
+ self.serialization.truncate(segment_start - 1); // Truncate "/.."
+ self.pop_path(scheme_type, path_start);
+ if !self.serialization[path_start..].ends_with('/') {
+ self.serialization.push('/')
+ }
+ }
+ "." | "%2e" | "%2E" => {
+ self.serialization.truncate(segment_start);
+ }
+ _ => {
+ if scheme_type.is_file()
+ && is_windows_drive_letter(&self.serialization[path_start + 1..])
+ {
+ if self.serialization.ends_with('|') {
+ self.serialization.pop();
+ self.serialization.push(':');
+ }
+ if *has_host {
+ self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive);
+ *has_host = false; // FIXME account for this in callers
+ }
+ }
+ if ends_with_slash {
+ self.serialization.push('/')
+ }
+ }
+ }
+ if !ends_with_slash {
+ break;
+ }
+ }
+ input
+ }
+
+ /// https://url.spec.whatwg.org/#pop-a-urls-path
+ fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) {
+ if self.serialization.len() > path_start {
+ let slash_position = self.serialization[path_start..].rfind('/').unwrap();
+ // + 1 since rfind returns the position before the slash.
+ let segment_start = path_start + slash_position + 1;
+ // Don’t pop a Windows drive letter
+ // FIXME: *normalized* Windows drive letter
+ if !(scheme_type.is_file()
+ && is_windows_drive_letter(&self.serialization[segment_start..]))
+ {
+ self.serialization.truncate(segment_start);
+ }
+ }
+ }
+
+ pub fn parse_cannot_be_a_base_path<'i>(&mut self, mut input: Input<'i>) -> Input<'i> {
+ loop {
+ let input_before_c = input.clone();
+ match input.next_utf8() {
+ Some(('?', _)) | Some(('#', _)) if self.context == Context::UrlParser => {
+ return input_before_c
+ }
+ Some((c, utf8_c)) => {
+ self.check_url_code_point(c, &input);
+ self.serialization
+ .extend(utf8_percent_encode(utf8_c, CONTROLS));
+ }
+ None => return input,
+ }
+ }
+ }
+
+ fn with_query_and_fragment(
+ mut self,
+ scheme_type: SchemeType,
+ scheme_end: u32,
+ username_end: u32,
+ host_start: u32,
+ host_end: u32,
+ host: HostInternal,
+ port: Option<u16>,
+ path_start: u32,
+ remaining: Input,
+ ) -> ParseResult<Url> {
+ let (query_start, fragment_start) =
+ self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?;
+ Ok(Url {
+ serialization: self.serialization,
+ scheme_end,
+ username_end,
+ host_start,
+ host_end,
+ host,
+ port,
+ path_start,
+ query_start,
+ fragment_start,
+ })
+ }
+
+ /// Return (query_start, fragment_start)
+ fn parse_query_and_fragment(
+ &mut self,
+ scheme_type: SchemeType,
+ scheme_end: u32,
+ mut input: Input,
+ ) -> ParseResult<(Option<u32>, Option<u32>)> {
+ let mut query_start = None;
+ match input.next() {
+ Some('#') => {}
+ Some('?') => {
+ query_start = Some(to_u32(self.serialization.len())?);
+ self.serialization.push('?');
+ let remaining = self.parse_query(scheme_type, scheme_end, input);
+ if let Some(remaining) = remaining {
+ input = remaining
+ } else {
+ return Ok((query_start, None));
+ }
+ }
+ None => return Ok((None, None)),
+ _ => panic!("Programming error. parse_query_and_fragment() called without ? or #"),
+ }
+
+ let fragment_start = to_u32(self.serialization.len())?;
+ self.serialization.push('#');
+ self.parse_fragment(input);
+ Ok((query_start, Some(fragment_start)))
+ }
+
+ pub fn parse_query<'i>(
+ &mut self,
+ scheme_type: SchemeType,
+ scheme_end: u32,
+ mut input: Input<'i>,
+ ) -> Option<Input<'i>> {
+ let mut query = String::new(); // FIXME: use a streaming decoder instead
+ let mut remaining = None;
+ while let Some(c) = input.next() {
+ if c == '#' && self.context == Context::UrlParser {
+ remaining = Some(input);
+ break;
+ } else {
+ self.check_url_code_point(c, &input);
+ query.push(c);
+ }
+ }
+
+ let encoding = match &self.serialization[..scheme_end as usize] {
+ "http" | "https" | "file" | "ftp" | "gopher" => self.query_encoding_override,
+ _ => None,
+ };
+ let query_bytes = ::query_encoding::encode(encoding, &query);
+ let set = if scheme_type.is_special() {
+ SPECIAL_QUERY
+ } else {
+ QUERY
+ };
+ self.serialization.extend(percent_encode(&query_bytes, set));
+ remaining
+ }
+
+ fn fragment_only(mut self, base_url: &Url, mut input: Input) -> ParseResult<Url> {
+ let before_fragment = match base_url.fragment_start {
+ Some(i) => base_url.slice(..i),
+ None => &*base_url.serialization,
+ };
+ debug_assert!(self.serialization.is_empty());
+ self.serialization
+ .reserve(before_fragment.len() + input.chars.as_str().len());
+ self.serialization.push_str(before_fragment);
+ self.serialization.push('#');
+ let next = input.next();
+ debug_assert!(next == Some('#'));
+ self.parse_fragment(input);
+ Ok(Url {
+ serialization: self.serialization,
+ fragment_start: Some(to_u32(before_fragment.len())?),
+ ..*base_url
+ })
+ }
+
+ pub fn parse_fragment(&mut self, mut input: Input) {
+ while let Some((c, utf8_c)) = input.next_utf8() {
+ if c == '\0' {
+ self.log_violation(SyntaxViolation::NullInFragment)
+ } else {
+ self.check_url_code_point(c, &input);
+ self.serialization.extend(utf8_percent_encode(
+ utf8_c,
+ // FIXME: tests fail when we use the FRAGMENT set here
+ // as defined in the spec as of 2019-07-17,
+ // likely because tests are out of date.
+ // See https://github.com/servo/rust-url/issues/290
+ CONTROLS,
+ ));
+ }
+ }
+ }
+
+ fn check_url_code_point(&self, c: char, input: &Input) {
+ if let Some(vfn) = self.violation_fn {
+ if c == '%' {
+ let mut input = input.clone();
+ if !matches!((input.next(), input.next()), (Some(a), Some(b))
+ if is_ascii_hex_digit(a) && is_ascii_hex_digit(b))
+ {
+ vfn(SyntaxViolation::PercentDecode)
+ }
+ } else if !is_url_code_point(c) {
+ vfn(SyntaxViolation::NonUrlCodePoint)
+ }
+ }
+ }
+}
+
+#[inline]
+fn is_ascii_hex_digit(c: char) -> bool {
+ matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
+}
+
+// Non URL code points:
+// U+0000 to U+0020 (space)
+// " # % < > [ \ ] ^ ` { | }
+// U+007F to U+009F
+// surrogates
+// U+FDD0 to U+FDEF
+// Last two of each plane: U+__FFFE to U+__FFFF for __ in 00 to 10 hex
+#[inline]
+fn is_url_code_point(c: char) -> bool {
+ matches!(c,
+ 'a'..='z' |
+ 'A'..='Z' |
+ '0'..='9' |
+ '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' |
+ '.' | '/' | ':' | ';' | '=' | '?' | '@' | '_' | '~' |
+ '\u{A0}'..='\u{D7FF}' | '\u{E000}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' |
+ '\u{10000}'..='\u{1FFFD}' | '\u{20000}'..='\u{2FFFD}' |
+ '\u{30000}'..='\u{3FFFD}' | '\u{40000}'..='\u{4FFFD}' |
+ '\u{50000}'..='\u{5FFFD}' | '\u{60000}'..='\u{6FFFD}' |
+ '\u{70000}'..='\u{7FFFD}' | '\u{80000}'..='\u{8FFFD}' |
+ '\u{90000}'..='\u{9FFFD}' | '\u{A0000}'..='\u{AFFFD}' |
+ '\u{B0000}'..='\u{BFFFD}' | '\u{C0000}'..='\u{CFFFD}' |
+ '\u{D0000}'..='\u{DFFFD}' | '\u{E1000}'..='\u{EFFFD}' |
+ '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
+}
+
+/// https://url.spec.whatwg.org/#c0-controls-and-space
+#[inline]
+fn c0_control_or_space(ch: char) -> bool {
+ ch <= ' ' // U+0000 to U+0020
+}
+
+/// https://url.spec.whatwg.org/#ascii-alpha
+#[inline]
+pub fn ascii_alpha(ch: char) -> bool {
+ matches!(ch, 'a'..='z' | 'A'..='Z')
+}
+
+#[inline]
+pub fn to_u32(i: usize) -> ParseResult<u32> {
+ if i <= ::std::u32::MAX as usize {
+ Ok(i as u32)
+ } else {
+ Err(ParseError::Overflow)
+ }
+}
+
+/// Wether the scheme is file:, the path has a single segment, and that segment
+/// is a Windows drive letter
+fn is_windows_drive_letter(segment: &str) -> bool {
+ segment.len() == 2 && starts_with_windows_drive_letter(segment)
+}
+
+fn starts_with_windows_drive_letter(s: &str) -> bool {
+ ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|')
+}
+
+fn starts_with_windows_drive_letter_segment(input: &Input) -> bool {
+ let mut input = input.clone();
+ matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c))
+ if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#'))
+}
diff --git a/third_party/rust/url/src/path_segments.rs b/third_party/rust/url/src/path_segments.rs
new file mode 100644
index 0000000000..97055e777c
--- /dev/null
+++ b/third_party/rust/url/src/path_segments.rs
@@ -0,0 +1,229 @@
+// Copyright 2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use parser::{self, to_u32, SchemeType};
+use std::str;
+use Url;
+
+/// Exposes methods to manipulate the path of an URL that is not cannot-be-base.
+///
+/// The path always starts with a `/` slash, and is made of slash-separated segments.
+/// There is always at least one segment (which may be the empty string).
+///
+/// Examples:
+///
+/// ```rust
+/// use url::Url;
+/// # use std::error::Error;
+///
+/// # fn run() -> Result<(), Box<Error>> {
+/// let mut url = Url::parse("mailto:me@example.com")?;
+/// assert!(url.path_segments_mut().is_err());
+///
+/// let mut url = Url::parse("http://example.net/foo/index.html")?;
+/// url.path_segments_mut().map_err(|_| "cannot be base")?
+/// .pop().push("img").push("2/100%.png");
+/// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png");
+/// # Ok(())
+/// # }
+/// # run().unwrap();
+/// ```
+#[derive(Debug)]
+pub struct PathSegmentsMut<'a> {
+ url: &'a mut Url,
+ after_first_slash: usize,
+ after_path: String,
+ old_after_path_position: u32,
+}
+
+// Not re-exported outside the crate
+pub fn new(url: &mut Url) -> PathSegmentsMut {
+ let after_path = url.take_after_path();
+ let old_after_path_position = to_u32(url.serialization.len()).unwrap();
+ debug_assert!(url.byte_at(url.path_start) == b'/');
+ PathSegmentsMut {
+ after_first_slash: url.path_start as usize + "/".len(),
+ url,
+ old_after_path_position,
+ after_path,
+ }
+}
+
+impl<'a> Drop for PathSegmentsMut<'a> {
+ fn drop(&mut self) {
+ self.url
+ .restore_after_path(self.old_after_path_position, &self.after_path)
+ }
+}
+
+impl<'a> PathSegmentsMut<'a> {
+ /// Remove all segments in the path, leaving the minimal `url.path() == "/"`.
+ ///
+ /// Returns `&mut Self` so that method calls can be chained.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
+ /// url.path_segments_mut().map_err(|_| "cannot be base")?
+ /// .clear().push("logout");
+ /// assert_eq!(url.as_str(), "https://github.com/logout");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn clear(&mut self) -> &mut Self {
+ self.url.serialization.truncate(self.after_first_slash);
+ self
+ }
+
+ /// Remove the last segment of this URL’s path if it is empty,
+ /// except if these was only one segment to begin with.
+ ///
+ /// In other words, remove one path trailing slash, if any,
+ /// unless it is also the initial slash (so this does nothing if `url.path() == "/")`.
+ ///
+ /// Returns `&mut Self` so that method calls can be chained.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
+ /// url.path_segments_mut().map_err(|_| "cannot be base")?
+ /// .push("pulls");
+ /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls");
+ ///
+ /// let mut url = Url::parse("https://github.com/servo/rust-url/")?;
+ /// url.path_segments_mut().map_err(|_| "cannot be base")?
+ /// .pop_if_empty().push("pulls");
+ /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn pop_if_empty(&mut self) -> &mut Self {
+ if self.url.serialization[self.after_first_slash..].ends_with('/') {
+ self.url.serialization.pop();
+ }
+ self
+ }
+
+ /// Remove the last segment of this URL’s path.
+ ///
+ /// If the path only has one segment, make it empty such that `url.path() == "/"`.
+ ///
+ /// Returns `&mut Self` so that method calls can be chained.
+ pub fn pop(&mut self) -> &mut Self {
+ let last_slash = self.url.serialization[self.after_first_slash..]
+ .rfind('/')
+ .unwrap_or(0);
+ self.url
+ .serialization
+ .truncate(self.after_first_slash + last_slash);
+ self
+ }
+
+ /// Append the given segment at the end of this URL’s path.
+ ///
+ /// See the documentation for `.extend()`.
+ ///
+ /// Returns `&mut Self` so that method calls can be chained.
+ pub fn push(&mut self, segment: &str) -> &mut Self {
+ self.extend(Some(segment))
+ }
+
+ /// Append each segment from the given iterator at the end of this URL’s path.
+ ///
+ /// Each segment is percent-encoded like in `Url::parse` or `Url::join`,
+ /// except that `%` and `/` characters are also encoded (to `%25` and `%2F`).
+ /// This is unlike `Url::parse` where `%` is left as-is in case some of the input
+ /// is already percent-encoded, and `/` denotes a path segment separator.)
+ ///
+ /// Note that, in addition to slashes between new segments,
+ /// this always adds a slash between the existing path and the new segments
+ /// *except* if the existing path is `"/"`.
+ /// If the previous last segment was empty (if the path had a trailing slash)
+ /// the path after `.extend()` will contain two consecutive slashes.
+ /// If that is undesired, call `.pop_if_empty()` first.
+ ///
+ /// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first.
+ ///
+ /// Returns `&mut Self` so that method calls can be chained.
+ ///
+ /// Example:
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("https://github.com/")?;
+ /// let org = "servo";
+ /// let repo = "rust-url";
+ /// let issue_number = "188";
+ /// url.path_segments_mut().map_err(|_| "cannot be base")?
+ /// .extend(&[org, repo, "issues", issue_number]);
+ /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ ///
+ /// In order to make sure that parsing the serialization of an URL gives the same URL,
+ /// a segment is ignored if it is `"."` or `".."`:
+ ///
+ /// ```rust
+ /// use url::Url;
+ /// # use std::error::Error;
+ ///
+ /// # fn run() -> Result<(), Box<Error>> {
+ /// let mut url = Url::parse("https://github.com/servo")?;
+ /// url.path_segments_mut().map_err(|_| "cannot be base")?
+ /// .extend(&["..", "rust-url", ".", "pulls"]);
+ /// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
+ /// # Ok(())
+ /// # }
+ /// # run().unwrap();
+ /// ```
+ pub fn extend<I>(&mut self, segments: I) -> &mut Self
+ where
+ I: IntoIterator,
+ I::Item: AsRef<str>,
+ {
+ let scheme_type = SchemeType::from(self.url.scheme());
+ let path_start = self.url.path_start as usize;
+ self.url.mutate(|parser| {
+ parser.context = parser::Context::PathSegmentSetter;
+ for segment in segments {
+ let segment = segment.as_ref();
+ if matches!(segment, "." | "..") {
+ continue;
+ }
+ if parser.serialization.len() > path_start + 1 {
+ parser.serialization.push('/');
+ }
+ let mut has_host = true; // FIXME account for this?
+ parser.parse_path(
+ scheme_type,
+ &mut has_host,
+ path_start,
+ parser::Input::new(segment),
+ );
+ }
+ });
+ self
+ }
+}
diff --git a/third_party/rust/url/src/query_encoding.rs b/third_party/rust/url/src/query_encoding.rs
new file mode 100644
index 0000000000..76aed15a7b
--- /dev/null
+++ b/third_party/rust/url/src/query_encoding.rs
@@ -0,0 +1,35 @@
+// Copyright 2019 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::borrow::Cow;
+
+pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<[u8]>>;
+
+pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) -> Cow<'a, [u8]> {
+ if let Some(o) = encoding_override {
+ return o(input);
+ }
+ input.as_bytes().into()
+}
+
+pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
+ match input {
+ Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
+ Cow::Owned(bytes) => {
+ let raw_utf8: *const [u8];
+ match String::from_utf8_lossy(&bytes) {
+ Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
+ Cow::Owned(s) => return s.into(),
+ }
+ // from_utf8_lossy returned a borrow of `bytes` unchanged.
+ debug_assert!(raw_utf8 == &*bytes as *const [u8]);
+ // Reuse the existing `Vec` allocation.
+ unsafe { String::from_utf8_unchecked(bytes) }.into()
+ }
+ }
+}
diff --git a/third_party/rust/url/src/quirks.rs b/third_party/rust/url/src/quirks.rs
new file mode 100644
index 0000000000..285ee21b65
--- /dev/null
+++ b/third_party/rust/url/src/quirks.rs
@@ -0,0 +1,226 @@
+// Copyright 2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api
+//!
+//! Unless you need to be interoperable with web browsers,
+//! you probably want to use `Url` method instead.
+
+use parser::{default_port, Context, Input, Parser, SchemeType};
+use {idna, Host, ParseError, Position, Url};
+
+/// https://url.spec.whatwg.org/#dom-url-domaintoascii
+pub fn domain_to_ascii(domain: &str) -> String {
+ match Host::parse(domain) {
+ Ok(Host::Domain(domain)) => domain,
+ _ => String::new(),
+ }
+}
+
+/// https://url.spec.whatwg.org/#dom-url-domaintounicode
+pub fn domain_to_unicode(domain: &str) -> String {
+ match Host::parse(domain) {
+ Ok(Host::Domain(ref domain)) => {
+ let (unicode, _errors) = idna::domain_to_unicode(domain);
+ unicode
+ }
+ _ => String::new(),
+ }
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-href
+pub fn href(url: &Url) -> &str {
+ url.as_str()
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-href
+pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> {
+ *url = Url::parse(value)?;
+ Ok(())
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-origin
+pub fn origin(url: &Url) -> String {
+ url.origin().ascii_serialization()
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-protocol
+#[inline]
+pub fn protocol(url: &Url) -> &str {
+ &url.as_str()[..url.scheme().len() + ":".len()]
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-protocol
+pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
+ // The scheme state in the spec ignores everything after the first `:`,
+ // but `set_scheme` errors if there is more.
+ if let Some(position) = new_protocol.find(':') {
+ new_protocol = &new_protocol[..position];
+ }
+ url.set_scheme(new_protocol)
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-username
+#[inline]
+pub fn username(url: &Url) -> &str {
+ url.username()
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-username
+pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> {
+ url.set_username(new_username)
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-password
+#[inline]
+pub fn password(url: &Url) -> &str {
+ url.password().unwrap_or("")
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-password
+pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
+ url.set_password(if new_password.is_empty() {
+ None
+ } else {
+ Some(new_password)
+ })
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-host
+#[inline]
+pub fn host(url: &Url) -> &str {
+ &url[Position::BeforeHost..Position::AfterPort]
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-host
+pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
+ if url.cannot_be_a_base() {
+ return Err(());
+ }
+ let host;
+ let opt_port;
+ {
+ let scheme = url.scheme();
+ let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
+ match result {
+ Ok((h, remaining)) => {
+ host = h;
+ opt_port = if let Some(remaining) = remaining.split_prefix(':') {
+ Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
+ .ok()
+ .map(|(port, _remaining)| port)
+ } else {
+ None
+ };
+ }
+ Err(_) => return Err(()),
+ }
+ }
+ url.set_host_internal(host, opt_port);
+ Ok(())
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-hostname
+#[inline]
+pub fn hostname(url: &Url) -> &str {
+ url.host_str().unwrap_or("")
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-hostname
+pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
+ if url.cannot_be_a_base() {
+ return Err(());
+ }
+ let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
+ if let Ok((host, _remaining)) = result {
+ url.set_host_internal(host, None);
+ Ok(())
+ } else {
+ Err(())
+ }
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-port
+#[inline]
+pub fn port(url: &Url) -> &str {
+ &url[Position::BeforePort..Position::AfterPort]
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-port
+pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
+ let result;
+ {
+ // has_host implies !cannot_be_a_base
+ let scheme = url.scheme();
+ if !url.has_host() || url.host() == Some(Host::Domain("")) || scheme == "file" {
+ return Err(());
+ }
+ result = Parser::parse_port(
+ Input::new(new_port),
+ || default_port(scheme),
+ Context::Setter,
+ )
+ }
+ if let Ok((new_port, _remaining)) = result {
+ url.set_port_internal(new_port);
+ Ok(())
+ } else {
+ Err(())
+ }
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-pathname
+#[inline]
+pub fn pathname(url: &Url) -> &str {
+ url.path()
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
+pub fn set_pathname(url: &mut Url, new_pathname: &str) {
+ if !url.cannot_be_a_base() {
+ url.set_path(new_pathname)
+ }
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-search
+pub fn search(url: &Url) -> &str {
+ trim(&url[Position::AfterPath..Position::AfterQuery])
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-search
+pub fn set_search(url: &mut Url, new_search: &str) {
+ url.set_query(match new_search {
+ "" => None,
+ _ if new_search.starts_with('?') => Some(&new_search[1..]),
+ _ => Some(new_search),
+ })
+}
+
+/// Getter for https://url.spec.whatwg.org/#dom-url-hash
+pub fn hash(url: &Url) -> &str {
+ trim(&url[Position::AfterQuery..])
+}
+
+/// Setter for https://url.spec.whatwg.org/#dom-url-hash
+pub fn set_hash(url: &mut Url, new_hash: &str) {
+ if url.scheme() != "javascript" {
+ url.set_fragment(match new_hash {
+ "" => None,
+ _ if new_hash.starts_with('#') => Some(&new_hash[1..]),
+ _ => Some(new_hash),
+ })
+ }
+}
+
+fn trim(s: &str) -> &str {
+ if s.len() == 1 {
+ ""
+ } else {
+ s
+ }
+}
diff --git a/third_party/rust/url/src/slicing.rs b/third_party/rust/url/src/slicing.rs
new file mode 100644
index 0000000000..2d7f78e6f8
--- /dev/null
+++ b/third_party/rust/url/src/slicing.rs
@@ -0,0 +1,187 @@
+// Copyright 2016 The rust-url developers.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::ops::{Index, Range, RangeFrom, RangeFull, RangeTo};
+use Url;
+
+impl Index<RangeFull> for Url {
+ type Output = str;
+ fn index(&self, _: RangeFull) -> &str {
+ &self.serialization
+ }
+}
+
+impl Index<RangeFrom<Position>> for Url {
+ type Output = str;
+ fn index(&self, range: RangeFrom<Position>) -> &str {
+ &self.serialization[self.index(range.start)..]
+ }
+}
+
+impl Index<RangeTo<Position>> for Url {
+ type Output = str;
+ fn index(&self, range: RangeTo<Position>) -> &str {
+ &self.serialization[..self.index(range.end)]
+ }
+}
+
+impl Index<Range<Position>> for Url {
+ type Output = str;
+ fn index(&self, range: Range<Position>) -> &str {
+ &self.serialization[self.index(range.start)..self.index(range.end)]
+ }
+}
+
+/// Indicates a position within a URL based on its components.
+///
+/// A range of positions can be used for slicing `Url`:
+///
+/// ```rust
+/// # use url::{Url, Position};
+/// # fn something(some_url: Url) {
+/// let serialization: &str = &some_url[..];
+/// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery];
+/// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort];
+/// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery];
+/// let scheme_relative: &str = &some_url[Position::BeforeUsername..];
+/// # }
+/// ```
+///
+/// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional),
+/// URL components and delimiters that separate them are:
+///
+/// ```notrust
+/// url =
+/// scheme ":"
+/// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]?
+/// path [ "?" query ]? [ "#" fragment ]?
+/// ```
+///
+/// When a given component is not present,
+/// its "before" and "after" position are the same
+/// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string)
+/// and component ordering is preserved
+/// (so that a missing query "is between" a path and a fragment).
+///
+/// The end of a component and the start of the next are either the same or separate
+/// by a delimiter.
+/// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.)
+/// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`),
+/// so `&url[..AfterQuery]` might be desired instead.
+///
+/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL,
+/// so `&url[BeforeScheme..X]` is the same as `&url[..X]`
+/// and `&url[X..AfterFragment]` is the same as `&url[X..]`.
+#[derive(Copy, Clone, Debug)]
+pub enum Position {
+ BeforeScheme,
+ AfterScheme,
+ BeforeUsername,
+ AfterUsername,
+ BeforePassword,
+ AfterPassword,
+ BeforeHost,
+ AfterHost,
+ BeforePort,
+ AfterPort,
+ BeforePath,
+ AfterPath,
+ BeforeQuery,
+ AfterQuery,
+ BeforeFragment,
+ AfterFragment,
+}
+
+impl Url {
+ #[inline]
+ fn index(&self, position: Position) -> usize {
+ match position {
+ Position::BeforeScheme => 0,
+
+ Position::AfterScheme => self.scheme_end as usize,
+
+ Position::BeforeUsername => {
+ if self.has_authority() {
+ self.scheme_end as usize + "://".len()
+ } else {
+ debug_assert!(self.byte_at(self.scheme_end) == b':');
+ debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end);
+ self.scheme_end as usize + ":".len()
+ }
+ }
+
+ Position::AfterUsername => self.username_end as usize,
+
+ Position::BeforePassword => {
+ if self.has_authority() && self.byte_at(self.username_end) == b':' {
+ self.username_end as usize + ":".len()
+ } else {
+ debug_assert!(self.username_end == self.host_start);
+ self.username_end as usize
+ }
+ }
+
+ Position::AfterPassword => {
+ if self.has_authority() && self.byte_at(self.username_end) == b':' {
+ debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@');
+ self.host_start as usize - "@".len()
+ } else {
+ debug_assert!(self.username_end == self.host_start);
+ self.host_start as usize
+ }
+ }
+
+ Position::BeforeHost => self.host_start as usize,
+
+ Position::AfterHost => self.host_end as usize,
+
+ Position::BeforePort => {
+ if self.port.is_some() {
+ debug_assert!(self.byte_at(self.host_end) == b':');
+ self.host_end as usize + ":".len()
+ } else {
+ self.host_end as usize
+ }
+ }
+
+ Position::AfterPort => self.path_start as usize,
+
+ Position::BeforePath => self.path_start as usize,
+
+ Position::AfterPath => match (self.query_start, self.fragment_start) {
+ (Some(q), _) => q as usize,
+ (None, Some(f)) => f as usize,
+ (None, None) => self.serialization.len(),
+ },
+
+ Position::BeforeQuery => match (self.query_start, self.fragment_start) {
+ (Some(q), _) => {
+ debug_assert!(self.byte_at(q) == b'?');
+ q as usize + "?".len()
+ }
+ (None, Some(f)) => f as usize,
+ (None, None) => self.serialization.len(),
+ },
+
+ Position::AfterQuery => match self.fragment_start {
+ None => self.serialization.len(),
+ Some(f) => f as usize,
+ },
+
+ Position::BeforeFragment => match self.fragment_start {
+ Some(f) => {
+ debug_assert!(self.byte_at(f) == b'#');
+ f as usize + "#".len()
+ }
+ None => self.serialization.len(),
+ },
+
+ Position::AfterFragment => self.serialization.len(),
+ }
+ }
+}