diff options
Diffstat (limited to 'vendor/gix-url/src')
-rw-r--r-- | vendor/gix-url/src/expand_path.rs | 15 | ||||
-rw-r--r-- | vendor/gix-url/src/lib.rs | 99 | ||||
-rw-r--r-- | vendor/gix-url/src/parse.rs | 349 | ||||
-rw-r--r-- | vendor/gix-url/src/scheme.rs | 3 |
4 files changed, 306 insertions, 160 deletions
diff --git a/vendor/gix-url/src/expand_path.rs b/vendor/gix-url/src/expand_path.rs index 85fb0da50..e62a8b51e 100644 --- a/vendor/gix-url/src/expand_path.rs +++ b/vendor/gix-url/src/expand_path.rs @@ -22,7 +22,7 @@ impl From<ForUser> for Option<BString> { } } -/// The error used by [`parse()`], [`with()`] and [`expand_path()`]. +/// The error used by [`parse()`], [`with()`] and [`expand_path()`](crate::expand_path()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -112,16 +112,3 @@ pub fn with( None => path.into(), }) } - -/// Expand `path` for the given `user`, which can be obtained by [`parse()`], resolving the home directories -/// of `user` automatically. -/// -/// If more precise control of the resolution mechanism is needed, then use the [`with()`] function. -pub fn expand_path(user: Option<&ForUser>, path: &BStr) -> Result<PathBuf, Error> { - with(user, path, |user| match user { - ForUser::Current => home::home_dir(), - ForUser::Name(user) => { - home::home_dir().and_then(|home| home.parent().map(|home_dirs| home_dirs.join(user.to_string()))) - } - }) -} diff --git a/vendor/gix-url/src/lib.rs b/vendor/gix-url/src/lib.rs index 1d90689ae..a86713e85 100644 --- a/vendor/gix-url/src/lib.rs +++ b/vendor/gix-url/src/lib.rs @@ -10,28 +10,58 @@ use bstr::{BStr, BString}; use std::borrow::Cow; - -/// -pub mod parse; -#[doc(inline)] -pub use parse::parse; +use std::path::PathBuf; /// pub mod expand_path; -#[doc(inline)] -pub use expand_path::expand_path; mod scheme; pub use scheme::Scheme; +mod impls; + +/// +pub mod parse; + +/// Parse the given `bytes` as a [git url](Url). +/// +/// # Note +/// +/// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable. +/// For file-paths, we don't expect UTF8 encoding either. +pub fn parse(input: &BStr) -> Result<Url, parse::Error> { + use parse::InputScheme; + match parse::find_scheme(input) { + InputScheme::Local => parse::local(input), + InputScheme::Url { protocol_end } if input[..protocol_end].eq_ignore_ascii_case(b"file") => { + parse::file_url(input, protocol_end) + } + InputScheme::Url { protocol_end } => parse::url(input, protocol_end), + InputScheme::Scp { colon } => parse::scp(input, colon), + } +} + +/// Expand `path` for the given `user`, which can be obtained by [`parse()`], resolving the home directories +/// of `user` automatically. +/// +/// If more precise control of the resolution mechanism is needed, then use the [expand_path::with()] function. +pub fn expand_path(user: Option<&expand_path::ForUser>, path: &BStr) -> Result<PathBuf, expand_path::Error> { + expand_path::with(user, path, |user| match user { + expand_path::ForUser::Current => home::home_dir(), + expand_path::ForUser::Name(user) => { + home::home_dir().and_then(|home| home.parent().map(|home_dirs| home_dirs.join(user.to_string()))) + } + }) +} /// A URL with support for specialized git related capabilities. /// -/// Additionally there is support for [deserialization][Url::from_bytes()] and serialization -/// (_see the `Display::fmt()` implementation_). +/// Additionally there is support for [deserialization](Url::from_bytes()) and serialization +/// (_see the [`std::fmt::Display::fmt()`] implementation_). /// -/// # Deviation +/// # Security Warning /// -/// Note that we do not support passing the password using the URL as it's likely leading to accidents. +/// URLs may contain passwords and we serialize them when [formatting](std::fmt::Display) or +/// [serializing losslessly](Url::to_bstring()). #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Url { @@ -55,7 +85,7 @@ pub struct Url { /// the invocation of programs from an attacker controlled URL. See <https://secure.phabricator.com/T12961> for details. /// /// If this value is going to be used in a command-line application, call [Self::path_argument_safe()] instead. - pub path: bstr::BString, + pub path: BString, } /// Instantiation @@ -88,7 +118,7 @@ impl Url { /// Modification impl Url { - /// Set the given `user`, with `None` unsetting it. Returns the previous value. + /// Set the given `user`, or unset it with `None`. Return the previous value. pub fn set_user(&mut self, user: Option<String>) -> Option<String> { let prev = self.user.take(); self.user = user; @@ -228,7 +258,7 @@ impl Url { } /// Transform ourselves into a binary string, losslessly, or fail if the URL is malformed due to host or user parts being incorrect. - pub fn to_bstring(&self) -> bstr::BString { + pub fn to_bstring(&self) -> BString { let mut buf = Vec::with_capacity( (5 + 3) + self.user.as_ref().map(String::len).unwrap_or_default() @@ -250,4 +280,43 @@ impl Url { } } -mod impls; +/// This module contains extensions to the [Url] struct which are only intended to be used +/// for testing code. Do not use this module in production! For all intends and purposes the APIs of +/// all functions and types exposed by this module are considered unstable and are allowed to break +/// even in patch releases! +#[doc(hidden)] +#[cfg(debug_assertions)] +pub mod testing { + use bstr::BString; + + use crate::{Scheme, Url}; + + /// Additional functions for [Url] which are only intended to be used for tests. + pub trait TestUrlExtension { + /// Create a new instance from the given parts without validating them. + /// + /// This function is primarily intended for testing purposes. For production code please + /// consider using [Url::from_parts] instead! + fn from_parts_unchecked( + scheme: Scheme, + user: Option<String>, + password: Option<String>, + host: Option<String>, + port: Option<u16>, + path: BString, + serialize_alternative_form: bool, + ) -> Url { + Url { + scheme, + user, + password, + host, + port, + path, + serialize_alternative_form, + } + } + } + + impl TestUrlExtension for Url {} +} diff --git a/vendor/gix-url/src/parse.rs b/vendor/gix-url/src/parse.rs index e6e7d4872..382666368 100644 --- a/vendor/gix-url/src/parse.rs +++ b/vendor/gix-url/src/parse.rs @@ -1,25 +1,30 @@ -use std::{borrow::Cow, convert::Infallible}; - -pub use bstr; -use bstr::{BStr, BString, ByteSlice}; +use std::convert::Infallible; use crate::Scheme; +use bstr::{BStr, BString, ByteSlice}; -/// The Error returned by [`parse()`] +/// The error returned by [parse()](crate::parse()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { - #[error("Could not decode URL as UTF8")] - Utf8(#[from] std::str::Utf8Error), - #[error(transparent)] - Url(#[from] url::ParseError), - #[error("URLs need to specify the path to the repository")] - MissingResourceLocation, - #[error("file URLs require an absolute or relative path to the repository")] - MissingRepositoryPath, - #[error("\"{url}\" is not a valid local path")] - NotALocalFile { url: BString }, - #[error("Relative URLs are not permitted: {url:?}")] + #[error("{} \"{url}\" is not valid UTF-8", kind.as_str())] + Utf8 { + url: BString, + kind: UrlKind, + source: std::str::Utf8Error, + }, + #[error("{} {url:?} can not be parsed as valid URL", kind.as_str())] + Url { + url: String, + kind: UrlKind, + source: url::ParseError, + }, + + #[error("The host portion of the following URL is too long ({} bytes, {len} bytes total): {truncated_url:?}", truncated_url.len())] + TooLong { truncated_url: BString, len: usize }, + #[error("{} \"{url}\" does not specify a path to a repository", kind.as_str())] + MissingRepositoryPath { url: BString, kind: UrlKind }, + #[error("URL {url:?} is relative which is not allowed in this context")] RelativeUrl { url: String }, } @@ -29,145 +34,229 @@ impl From<Infallible> for Error { } } -fn str_to_protocol(s: &str) -> Scheme { - Scheme::from(s) +/// +#[derive(Debug, Clone, Copy)] +pub enum UrlKind { + /// + Url, + /// + Scp, + /// + Local, } -fn guess_protocol(url: &[u8]) -> Option<&str> { - match url.find_byte(b':') { - Some(colon_pos) => { - if url[..colon_pos].find_byteset(b"@.").is_some() { - "ssh" - } else { - url.get(colon_pos + 1..).and_then(|from_colon| { - (from_colon.contains(&b'/') || from_colon.contains(&b'\\')).then_some("file") - })? - } +impl UrlKind { + fn as_str(&self) -> &'static str { + match self { + UrlKind::Url => "URL", + UrlKind::Scp => "SCP-like target", + UrlKind::Local => "local path", } - None => "file", } - .into() } -/// Extract the path part from an SCP-like URL `[user@]host.xz:path/to/repo.git/` -fn extract_scp_path(url: &str) -> Option<&str> { - url.splitn(2, ':').last() +pub(crate) enum InputScheme { + Url { protocol_end: usize }, + Scp { colon: usize }, + Local, } -fn sanitize_for_protocol<'a>(protocol: &str, url: &'a str) -> Cow<'a, str> { - match protocol { - "ssh" => url.replacen(':', "/", 1).into(), - _ => url.into(), +pub(crate) fn find_scheme(input: &BStr) -> InputScheme { + // TODO: url's may only contain `:/`, we should additionally check if the characters used for + // protocol are all valid + if let Some(protocol_end) = input.find("://") { + return InputScheme::Url { protocol_end }; } -} -fn has_no_explicit_protocol(url: &[u8]) -> bool { - url.find(b"://").is_none() + if let Some(colon) = input.find_byte(b':') { + // allow user to select files containing a `:` by passing them as absolute or relative path + // this is behavior explicitly mentioned by the scp and git manuals + let explicitly_local = &input[..colon].contains(&b'/'); + let dos_driver_letter = cfg!(windows) && input[..colon].len() == 1; + + if !explicitly_local && !dos_driver_letter { + return InputScheme::Scp { colon }; + } + } + + InputScheme::Local } -fn to_owned_url(url: url::Url) -> Result<crate::Url, Error> { - let password = url.password(); +pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error> { + const MAX_LEN: usize = 1024; + let bytes_to_path = input[protocol_end + "://".len()..] + .iter() + .filter(|b| !b.is_ascii_whitespace()) + .skip_while(|b| **b == b'/' || **b == b'\\') + .position(|b| *b == b'/') + .unwrap_or(input.len() - protocol_end); + if bytes_to_path > MAX_LEN || protocol_end > MAX_LEN { + return Err(Error::TooLong { + truncated_url: input[..(protocol_end + "://".len() + MAX_LEN).min(input.len())].into(), + len: input.len(), + }); + } + let (input, url) = input_to_utf8_and_url(input, UrlKind::Url)?; + let scheme = url.scheme().into(); + + if matches!(scheme, Scheme::Git | Scheme::Ssh) && url.path().is_empty() { + return Err(Error::MissingRepositoryPath { + url: input.into(), + kind: UrlKind::Url, + }); + } + + if url.cannot_be_a_base() { + return Err(Error::RelativeUrl { url: input.to_owned() }); + } + Ok(crate::Url { serialize_alternative_form: false, - scheme: str_to_protocol(url.scheme()), - password: password.map(ToOwned::to_owned), - user: if url.username().is_empty() && password.is_none() { - None - } else { - Some(url.username().into()) - }, + scheme, + user: url_user(&url), + password: url.password().map(Into::into), host: url.host_str().map(Into::into), port: url.port(), path: url.path().into(), }) } -/// Parse the given `bytes` as git url. -/// -/// # Note -/// -/// We cannot and should never have to deal with UTF-16 encoded windows strings, so bytes input is acceptable. -/// For file-paths, we don't expect UTF8 encoding either. -pub fn parse(input: &BStr) -> Result<crate::Url, Error> { - let guessed_protocol = guess_protocol(input).ok_or_else(|| Error::NotALocalFile { url: input.into() })?; - let path_without_file_protocol = input.strip_prefix(b"file://"); - if path_without_file_protocol.is_some() || (has_no_explicit_protocol(input) && guessed_protocol == "file") { - let path: BString = path_without_file_protocol.map_or_else( - || input.into(), - |stripped_path| { - #[cfg(windows)] - { - if stripped_path.starts_with(b"/") { - input - .to_str() - .ok() - .and_then(|url| { - let path = url::Url::parse(url).ok()?.to_file_path().ok()?; - path.is_absolute().then(|| gix_path::into_bstr(path).into_owned()) - }) - .unwrap_or_else(|| stripped_path.into()) - } else { - stripped_path.into() - } - } - #[cfg(not(windows))] - { - stripped_path.into() - } - }, - ); - if path.is_empty() { - return Err(Error::MissingRepositoryPath); - } - let input_starts_with_file_protocol = input.starts_with(b"file://"); - if input_starts_with_file_protocol { - let wanted = cfg!(windows).then(|| &[b'\\', b'/'] as &[_]).unwrap_or(&[b'/']); - if !wanted.iter().any(|w| path.contains(w)) { - return Err(Error::MissingRepositoryPath); - } - } - return Ok(crate::Url { - scheme: Scheme::File, - path, - serialize_alternative_form: !input_starts_with_file_protocol, - ..Default::default() +pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> { + let input = input_to_utf8(input, UrlKind::Scp)?; + + // TODO: this incorrectly splits at IPv6 addresses, check for `[]` before splitting + let (host, path) = input.split_at(colon); + debug_assert_eq!(path.get(..1), Some(":"), "{path} should start with :"); + let path = &path[1..]; + + if path.is_empty() { + return Err(Error::MissingRepositoryPath { + url: input.to_owned().into(), + kind: UrlKind::Scp, }); } - let url_str = std::str::from_utf8(input)?; - let (mut url, mut scp_path) = match url::Url::parse(url_str) { - Ok(url) => (url, None), - Err(url::ParseError::RelativeUrlWithoutBase) => { - // happens with bare paths as well as scp like paths. The latter contain a ':' past the host portion, - // which we are trying to detect. - ( - url::Url::parse(&format!( - "{}://{}", - guessed_protocol, - sanitize_for_protocol(guessed_protocol, url_str) - ))?, - extract_scp_path(url_str), - ) + // The path returned by the parsed url often has the wrong number of leading `/` characters but + // should never differ in any other way (ssh URLs should not contain a query or fragment part). + // To avoid the various off-by-one errors caused by the `/` characters, we keep using the path + // determined above and can therefore skip parsing it here as well. + let url = url::Url::parse(&format!("ssh://{host}")).map_err(|source| Error::Url { + url: input.to_owned(), + kind: UrlKind::Scp, + source, + })?; + + Ok(crate::Url { + serialize_alternative_form: true, + scheme: url.scheme().into(), + user: url_user(&url), + password: url.password().map(Into::into), + host: url.host_str().map(Into::into), + port: url.port(), + path: path.into(), + }) +} + +fn url_user(url: &url::Url) -> Option<String> { + if url.username().is_empty() && url.password().is_none() { + None + } else { + Some(url.username().into()) + } +} + +pub(crate) fn file_url(input: &BStr, protocol_colon: usize) -> Result<crate::Url, Error> { + let input = input_to_utf8(input, UrlKind::Url)?; + let input_after_protocol = &input[protocol_colon + "://".len()..]; + + let Some(first_slash) = input_after_protocol + .find('/') + .or_else(|| cfg!(windows).then(|| input_after_protocol.find('\\')).flatten()) + else { + return Err(Error::MissingRepositoryPath { + url: input.to_owned().into(), + kind: UrlKind::Url, + }); + }; + + // We cannot use the url crate to parse host and path because it special cases Windows + // driver letters. With the url crate an input of `file://x:/path/to/git` is parsed as empty + // host and with `x:/path/to/git` as path. This behavior is wrong for Git which only follows + // that rule on Windows and parses `x:` as host on Unix platforms. Additionally the url crate + // does not account for Windows special UNC path support. + + // TODO: implement UNC path special case + let windows_special_path = if cfg!(windows) { + // Inputs created via url::Url::from_file_path contain an additional `/` between the + // protocol and the absolute path. Make sure we ignore that first slash character to avoid + // producing invalid paths. + let input_after_protocol = if first_slash == 0 { + &input_after_protocol[1..] + } else { + input_after_protocol + }; + // parse `file://x:/path/to/git` as explained above + if input_after_protocol.chars().nth(1) == Some(':') { + Some(input_after_protocol) + } else { + None } - Err(err) => return Err(err.into()), + } else { + None }; - // SCP like URLs without user parse as 'something' with the scheme being the 'host'. Hosts always have dots. - if url.scheme().find('.').is_some() { - // try again with prefixed protocol - url = url::Url::parse(&format!("ssh://{}", sanitize_for_protocol("ssh", url_str)))?; - scp_path = extract_scp_path(url_str); - } - if url.path().is_empty() && ["ssh", "git"].contains(&url.scheme()) { - return Err(Error::MissingResourceLocation); - } - if url.cannot_be_a_base() { - return Err(Error::RelativeUrl { url: url.into() }); - } - let mut url = to_owned_url(url)?; - if let Some(path) = scp_path { - url.path = path.into(); - url.serialize_alternative_form = true; + let host = if windows_special_path.is_some() || first_slash == 0 { + // `file:///path/to/git` or a windows special case was triggered + None + } else { + // `file://host/path/to/git` + Some(&input_after_protocol[..first_slash]) + }; + + // default behavior on Unix platforms and if no Windows special case was triggered + let path = windows_special_path.unwrap_or(&input_after_protocol[first_slash..]); + + Ok(crate::Url { + serialize_alternative_form: false, + host: host.map(Into::into), + ..local(path.into())? + }) +} + +pub(crate) fn local(input: &BStr) -> Result<crate::Url, Error> { + if input.is_empty() { + return Err(Error::MissingRepositoryPath { + url: input.to_owned(), + kind: UrlKind::Local, + }); } - Ok(url) + + Ok(crate::Url { + serialize_alternative_form: true, + scheme: Scheme::File, + password: None, + user: None, + host: None, + port: None, + path: input.to_owned(), + }) +} + +fn input_to_utf8(input: &BStr, kind: UrlKind) -> Result<&str, Error> { + std::str::from_utf8(input).map_err(|source| Error::Utf8 { + url: input.to_owned(), + kind, + source, + }) +} + +fn input_to_utf8_and_url(input: &BStr, kind: UrlKind) -> Result<(&str, url::Url), Error> { + let input = input_to_utf8(input, kind)?; + url::Url::parse(input) + .map(|url| (input, url)) + .map_err(|source| Error::Url { + url: input.to_owned(), + kind, + source, + }) } diff --git a/vendor/gix-url/src/scheme.rs b/vendor/gix-url/src/scheme.rs index 1c5f04526..a50b735c7 100644 --- a/vendor/gix-url/src/scheme.rs +++ b/vendor/gix-url/src/scheme.rs @@ -24,7 +24,8 @@ pub enum Scheme { impl<'a> From<&'a str> for Scheme { fn from(value: &'a str) -> Self { match value { - "ssh" => Scheme::Ssh, + // "ssh+git" and "git+ssh" are legacy, but Git still allows them and so should we + "ssh" | "ssh+git" | "git+ssh" => Scheme::Ssh, "file" => Scheme::File, "git" => Scheme::Git, "http" => Scheme::Http, |