5 files changed, 492 insertions, 0 deletions
diff --git a/vendor/gix-path/src/convert.rs b/vendor/gix-path/src/convert.rs
new file mode 100644
index 000000000..6a949529f
--- /dev/null
+++ b/vendor/gix-path/src/convert.rs
@@ -0,0 +1,273 @@
+use std::{
+    borrow::Cow,
+    ffi::{OsStr, OsString},
+    path::{Path, PathBuf},
+};
+
+use bstr::{BStr, BString};
+
+#[derive(Debug)]
+/// The error type returned by [`into_bstr()`] and others may suffer from failed conversions from or to bytes.
+pub struct Utf8Error;
+
+impl std::fmt::Display for Utf8Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("Could not convert to UTF8 or from UTF8 due to ill-formed input")
+    }
+}
+
+impl std::error::Error for Utf8Error {}
+
+/// Like [`into_bstr()`], but takes `OsStr` as input for a lossless, but fallible, conversion.
+pub fn os_str_into_bstr(path: &OsStr) -> Result<&BStr, Utf8Error> {
+    let path = try_into_bstr(Cow::Borrowed(path.as_ref()))?;
+    match path {
+        Cow::Borrowed(path) => Ok(path),
+        Cow::Owned(_) => unreachable!("borrowed cows stay borrowed"),
+    }
+}
+
+/// Like [`into_bstr()`], but takes `OsString` as input for a lossless, but fallible, conversion.
+pub fn os_string_into_bstring(path: OsString) -> Result<BString, Utf8Error> {
+    let path = try_into_bstr(Cow::Owned(path.into()))?;
+    match path {
+        Cow::Borrowed(_path) => unreachable!("borrowed cows stay borrowed"),
+        Cow::Owned(path) => Ok(path),
+    }
+}
+
+/// Convert the given path either into its raw bytes on unix or its UTF8 encoded counterpart on windows.
+///
+/// On windows, if the source Path contains ill-formed, lone surrogates, the UTF-8 conversion will fail
+/// causing `Utf8Error` to be returned.
+pub fn try_into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Result<Cow<'a, BStr>, Utf8Error> {
+    let path = path.into();
+    let path_str = match path {
+        Cow::Owned(path) => Cow::Owned({
+            #[cfg(unix)]
+            let p: BString = {
+                use std::os::unix::ffi::OsStringExt;
+                path.into_os_string().into_vec().into()
+            };
+            #[cfg(target_os = "wasi")]
+            let p: BString = {
+                use std::os::wasi::ffi::OsStringExt;
+                path.into_os_string().into_vec().into()
+            };
+            #[cfg(not(any(unix, target_os = "wasi")))]
+            let p: BString = path.into_os_string().into_string().map_err(|_| Utf8Error)?.into();
+            p
+        }),
+        Cow::Borrowed(path) => Cow::Borrowed({
+            #[cfg(unix)]
+            let p: &BStr = {
+                use std::os::unix::ffi::OsStrExt;
+                path.as_os_str().as_bytes().into()
+            };
+            #[cfg(target_os = "wasi")]
+            let p: &BStr = {
+                use std::os::wasi::ffi::OsStrExt;
+                path.as_os_str().as_bytes().into()
+            };
+            #[cfg(not(any(unix, target_os = "wasi")))]
+            let p: &BStr = path.to_str().ok_or(Utf8Error)?.as_bytes().into();
+            p
+        }),
+    };
+    Ok(path_str)
+}
+
+/// Similar to [`try_into_bstr()`] but **panics** if malformed surrogates are encountered on windows.
+pub fn into_bstr<'a>(path: impl Into<Cow<'a, Path>>) -> Cow<'a, BStr> {
+    try_into_bstr(path).expect("prefix path doesn't contain ill-formed UTF-8")
+}
+
+/// Given `input` bytes, produce a `Path` from them ignoring encoding entirely if on unix.
+///
+/// On windows, the input is required to be valid UTF-8, which is guaranteed if we wrote it before. There are some potential
+/// git versions and windows installation which produce mal-formed UTF-16 if certain emojies are in the path. It's as rare as
+/// it sounds, but possible.
+pub fn try_from_byte_slice(input: &[u8]) -> Result<&Path, Utf8Error> {
+    #[cfg(unix)]
+    let p = {
+        use std::os::unix::ffi::OsStrExt;
+        OsStr::from_bytes(input).as_ref()
+    };
+    #[cfg(target_os = "wasi")]
+    let p: &Path = {
+        use std::os::wasi::ffi::OsStrExt;
+        OsStr::from_bytes(input).as_ref()
+    };
+    #[cfg(not(any(unix, target_os = "wasi")))]
+    let p = Path::new(std::str::from_utf8(input).map_err(|_| Utf8Error)?);
+    Ok(p)
+}
+
+/// Similar to [`from_byte_slice()`], but takes either borrowed or owned `input`.
+pub fn try_from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Result<Cow<'a, Path>, Utf8Error> {
+    let input = input.into();
+    match input {
+        Cow::Borrowed(input) => try_from_byte_slice(input).map(Cow::Borrowed),
+        Cow::Owned(input) => try_from_bstring(input).map(Cow::Owned),
+    }
+}
+
+/// Similar to [`try_from_bstr()`], but **panics** if malformed surrogates are encountered on windows.
+pub fn from_bstr<'a>(input: impl Into<Cow<'a, BStr>>) -> Cow<'a, Path> {
+    try_from_bstr(input).expect("prefix path doesn't contain ill-formed UTF-8")
+}
+
+/// Similar to [`try_from_bstr()`], but takes and produces owned data.
+pub fn try_from_bstring(input: impl Into<BString>) -> Result<PathBuf, Utf8Error> {
+    let input = input.into();
+    #[cfg(unix)]
+    let p = {
+        use std::os::unix::ffi::OsStringExt;
+        std::ffi::OsString::from_vec(input.into()).into()
+    };
+    #[cfg(target_os = "wasi")]
+    let p: PathBuf = {
+        use std::os::wasi::ffi::OsStringExt;
+        std::ffi::OsString::from_vec(input.into()).into()
+    };
+    #[cfg(not(any(unix, target_os = "wasi")))]
+    let p = {
+        use bstr::ByteVec;
+        PathBuf::from(
+            {
+                let v: Vec<_> = input.into();
+                v
+            }
+            .into_string()
+            .map_err(|_| Utf8Error)?,
+        )
+    };
+    Ok(p)
+}
+
+/// Similar to [`try_from_bstring()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
+pub fn from_bstring(input: impl Into<BString>) -> PathBuf {
+    try_from_bstring(input).expect("well-formed UTF-8 on windows")
+}
+
+/// Similar to [`try_from_byte_slice()`], but will **panic** if there is ill-formed UTF-8 in the `input`.
+pub fn from_byte_slice(input: &[u8]) -> &Path {
+    try_from_byte_slice(input).expect("well-formed UTF-8 on windows")
+}
+
+fn replace<'a>(path: impl Into<Cow<'a, BStr>>, find: u8, replace: u8) -> Cow<'a, BStr> {
+    let path = path.into();
+    match path {
+        Cow::Owned(mut path) => {
+            for b in path.iter_mut().filter(|b| **b == find) {
+                *b = replace;
+            }
+            path.into()
+        }
+        Cow::Borrowed(path) => {
+            if !path.contains(&find) {
+                return path.into();
+            }
+            let mut path = path.to_owned();
+            for b in path.iter_mut().filter(|b| **b == find) {
+                *b = replace;
+            }
+            path.into()
+        }
+    }
+}
+
+/// Assures the given bytes use the native path separator.
+pub fn to_native_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
+    #[cfg(not(windows))]
+    let p = to_unix_separators(path);
+    #[cfg(windows)]
+    let p = to_windows_separators(path);
+    p
+}
+
+/// Convert paths with slashes to backslashes on windows and do nothing on unix, but **panics** if malformed surrogates are encountered on windows.
+pub fn to_native_path_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, std::path::Path> {
+    #[cfg(not(windows))]
+    {
+        crate::from_bstr(path)
+    }
+    #[cfg(windows)]
+    {
+        crate::from_bstr(to_windows_separators(path))
+    }
+}
+
+/// Replaces windows path separators with slashes, but only do so on windows.
+pub fn to_unix_separators_on_windows<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
+    #[cfg(windows)]
+    {
+        replace(path, b'\\', b'/')
+    }
+    #[cfg(not(windows))]
+    {
+        path.into()
+    }
+}
+
+/// Replaces windows path separators with slashes, unconditionally.
+///
+/// **Note** Do not use these and prefer the conditional versions of this method.
+// TODO: use https://lib.rs/crates/path-slash to handle escapes
+pub fn to_unix_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
+    replace(path, b'\\', b'/')
+}
+
+/// Find backslashes and replace them with slashes, which typically resembles a unix path, unconditionally.
+///
+/// **Note** Do not use these and prefer the conditional versions of this method.
+// TODO: use https://lib.rs/crates/path-slash to handle escapes
+pub fn to_windows_separators<'a>(path: impl Into<Cow<'a, BStr>>) -> Cow<'a, BStr> {
+    replace(path, b'/', b'\\')
+}
+
+/// Resolve relative components virtually without accessing the file system, e.g. turn `a/./b/c/.././..` into `a`,
+/// without keeping intermediate `..` and `/a/../b/..` becomes `/`.
+/// If the input path was relative and ends up being the `current_dir`, `.` is returned instead of the full path to `current_dir`.
+///
+/// This is particularly useful when manipulating paths that are based on user input, and not resolving intermediate
+/// symlinks keeps the path similar to what the user provided. If that's not desirable, use `[realpath()][crate::realpath()`
+/// instead.
+///
+/// Note that we might access the `current_dir` if we run out of path components to pop off, which is expected to be absolute
+/// as typical return value of `std::env::current_dir()`.
+/// As a `current_dir` like `/c` can be exhausted by paths like `../../r`, `None` will be returned to indicate the inability
+/// to produce a logically consistent path.
+pub fn normalize<'a>(path: impl Into<Cow<'a, Path>>, current_dir: impl AsRef<Path>) -> Option<Cow<'a, Path>> {
+    use std::path::Component::ParentDir;
+
+    let path = path.into();
+    if !path.components().any(|c| matches!(c, ParentDir)) {
+        return Some(path);
+    }
+    let current_dir = current_dir.as_ref();
+    let mut current_dir_opt = Some(current_dir);
+    let was_relative = path.is_relative();
+    let components = path.components();
+    let mut path = PathBuf::new();
+    for component in components {
+        if let ParentDir = component {
+            let path_was_dot = path == Path::new(".");
+            if path.as_os_str().is_empty() || path_was_dot {
+                path.push(current_dir_opt.take()?);
+            }
+            if !path.pop() {
+                return None;
+            }
+        } else {
+            path.push(component)
+        }
+    }
+
+    if (path.as_os_str().is_empty() || path == current_dir) && was_relative {
+        Cow::Borrowed(Path::new("."))
+    } else {
+        path.into()
+    }
+    .into()
+}
diff --git a/vendor/gix-path/src/lib.rs b/vendor/gix-path/src/lib.rs
new file mode 100644
index 000000000..70a9bc53f
--- /dev/null
+++ b/vendor/gix-path/src/lib.rs
@@ -0,0 +1,68 @@
+//! This crate contains an assortment of utilities to deal with paths and their conversions.
+//!
+//! Generally `git` treats paths as bytes, but inherently assumes non-illformed UTF-8 as encoding on windows. Internally, it expects
+//! slashes to be used as path separators and paths in files must have slashes, with conversions being performed on windows accordingly.
+//!
+//! <details>
+//!
+//! ### Research
+//!
+//! * **windows**
+//! - [`dirent.c`](https://github.com/git/git/blob/main/compat/win32/dirent.c#L31:L31) contains all implementation (seemingly) of opening directories and reading their entries, along with all path conversions (UTF-16 for windows). This is done on the fly so git can work with [in UTF-8](https://github.com/git/git/blob/main/compat/win32/dirent.c#L12:L12).
+//! - mingw [is used for the conversion](https://github.com/git/git/blob/main/compat/mingw.h#L579:L579) and it appears they handle surrogates during the conversion, maybe some sort of non-strict UTF-8 converter? Actually it uses [WideCharToMultiByte](https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte)
+//!   under the hood which by now does fail if the UTF-8 would be invalid unicode, i.e. unicode pairs.
+//! - `OsString` on windows already stores strings as WTF-8, which supports [surrogate pairs](https://unicodebook.readthedocs.io/unicode_encodings.html),
+//!    something that UTF-8 isn't allowed do it for security reasons, after all it's UTF-16 specific and exists only to extend
+//!    the encodable code-points.
+//! - informative reading on [WTF-8](https://simonsapin.github.io/wtf-8/#motivation) which is the encoding used by Rust
+//!   internally that deals with surrogates and non-wellformed surrogates (those that aren't in pairs).
+//! * **unix**
+//! - It uses [opendir](https://man7.org/linux/man-pages/man3/opendir.3.html) and [readdir](https://man7.org/linux/man-pages/man3/readdir.3.html)
+//!   respectively. There is no encoding specified, except that these paths are null-terminated.
+//!
+//! ### Learnings
+//!
+//! Surrogate pairs are a way to extend the encodable value range in UTF-16 encodings, used primarily on windows and in Javascript.
+//! For a long time these codepoints used for surrogates, always to be used in pairs, were not assigned, until…they were for rare
+//! emojies and the likes. The unicode standard does not require surrogates to happen in pairs, even though by now unpaired surrogates
+//! in UTF-16 are considered ill-formed, which aren't supposed to be converted to UTF-8 for example.
+//!
+//! This is the reason we have to deal with `to_string_lossy()`, it's _just_ for that quirk.
+//!
+//! This also means the only platform ever eligible to see conversion errors is windows, and there it's only older pre-vista
+//! windows versions which incorrectly allow ill-formed UTF-16 strings. Newer versions don't perform such conversions anymore, for
+//! example when going from UTF-16 to UTF-8, they will trigger an error.
+//!
+//! ### Conclusions
+//!
+//! Since [WideCharToMultiByte](https://docs.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-widechartomultibyte) by now is
+//! fixed (Vista onward) to produce valid UTF-8, lone surrogate codepoints will cause failure, which `git`
+//! [doesn't care about](https://github.com/git/git/blob/main/compat/win32/dirent.c#L12:L12).
+//!
+//! We will, though, which means from now on we can just convert to UTF-8 on windows and bubble up errors where necessary,
+//! preventing potential mismatched surrogate pairs to ever be saved on disk by gitoxide.
+//!
+//! Even though the error only exists on older windows versions, we will represent it in the type system through fallible function calls.
+//! Callers may `.expect()` on the result to indicate they don't wish to handle this special and rare case. Note that servers should not
+//! ever get into a code-path which does panic though.
+//! </details>
+#![deny(missing_docs, rust_2018_idioms)]
+#![forbid(unsafe_code)]
+
+/// A dummy type to represent path specs and help finding all spots that take path specs once it is implemented.
+
+/// A preliminary version of a path-spec based on glances of the code.
+#[derive(Clone, Debug)]
+pub struct Spec(bstr::BString);
+
+mod convert;
+pub use convert::*;
+
+mod util;
+pub use util::is_absolute;
+
+mod spec;
+
+///
+pub mod realpath;
+pub use realpath::function::{realpath, realpath_opts};
diff --git a/vendor/gix-path/src/realpath.rs b/vendor/gix-path/src/realpath.rs
new file mode 100644
index 000000000..807cb74cc
--- /dev/null
+++ b/vendor/gix-path/src/realpath.rs
@@ -0,0 +1,90 @@
+/// The error returned by [`realpath()`][super::realpath()].
+#[derive(Debug, thiserror::Error)]
+#[allow(missing_docs)]
+pub enum Error {
+    #[error("The maximum allowed number {} of symlinks in path is exceeded", .max_symlinks)]
+    MaxSymlinksExceeded { max_symlinks: u8 },
+    #[error(transparent)]
+    ReadLink(std::io::Error),
+    #[error(transparent)]
+    CurrentWorkingDir(std::io::Error),
+    #[error("Empty is not a valid path")]
+    EmptyPath,
+    #[error("Ran out of path components while following parent component '..'")]
+    MissingParent,
+}
+
+/// The default amount of symlinks we may follow when resolving a path in [`realpath()`][crate::realpath()].
+pub const MAX_SYMLINKS: u8 = 32;
+
+pub(crate) mod function {
+    use std::path::{
+        Component::{CurDir, Normal, ParentDir, Prefix, RootDir},
+        Path, PathBuf,
+    };
+
+    use super::Error;
+    use crate::realpath::MAX_SYMLINKS;
+
+    /// Check each component of `path` and see if it is a symlink. If so, resolve it.
+    /// Do not fail for non-existing components, but assume these are as is.
+    ///
+    /// If `path` is relative, the current working directory be used to make it absolute.
+    pub fn realpath(path: impl AsRef<Path>) -> Result<PathBuf, Error> {
+        let cwd = path
+            .as_ref()
+            .is_relative()
+            .then(std::env::current_dir)
+            .unwrap_or_else(|| Ok(PathBuf::default()))
+            .map_err(Error::CurrentWorkingDir)?;
+        realpath_opts(path, cwd, MAX_SYMLINKS)
+    }
+
+    /// The same as [`realpath()`], but allow to configure `max_symlinks` to configure how many symbolic links we are going to follow.
+    /// This serves to avoid running into cycles or doing unreasonable amounts of work.
+    pub fn realpath_opts(path: impl AsRef<Path>, cwd: impl AsRef<Path>, max_symlinks: u8) -> Result<PathBuf, Error> {
+        let path = path.as_ref();
+        if path.as_os_str().is_empty() {
+            return Err(Error::EmptyPath);
+        }
+
+        let mut real_path = PathBuf::new();
+        if path.is_relative() {
+            real_path.push(cwd);
+        }
+
+        let mut num_symlinks = 0;
+        let mut path_backing: PathBuf;
+        let mut components = path.components();
+        while let Some(component) = components.next() {
+            match component {
+                part @ RootDir | part @ Prefix(_) => real_path.push(part),
+                CurDir => {}
+                ParentDir => {
+                    if !real_path.pop() {
+                        return Err(Error::MissingParent);
+                    }
+                }
+                Normal(part) => {
+                    real_path.push(part);
+                    if real_path.is_symlink() {
+                        num_symlinks += 1;
+                        if num_symlinks > max_symlinks {
+                            return Err(Error::MaxSymlinksExceeded { max_symlinks });
+                        }
+                        let mut link_destination = std::fs::read_link(real_path.as_path()).map_err(Error::ReadLink)?;
+                        if link_destination.is_absolute() {
+                            // pushing absolute path to real_path resets it to the pushed absolute path
+                        } else {
+                            assert!(real_path.pop(), "we just pushed a component");
+                        }
+                        link_destination.extend(components);
+                        path_backing = link_destination;
+                        components = path_backing.components();
+                    }
+                }
+            }
+        }
+        Ok(real_path)
+    }
+}
diff --git a/vendor/gix-path/src/spec.rs b/vendor/gix-path/src/spec.rs
new file mode 100644
index 000000000..0ff9e661c
--- /dev/null
+++ b/vendor/gix-path/src/spec.rs
@@ -0,0 +1,53 @@
+use std::ffi::OsStr;
+
+use bstr::{BStr, ByteSlice, ByteVec};
+
+use crate::Spec;
+
+impl std::convert::TryFrom<&OsStr> for Spec {
+    type Error = crate::Utf8Error;
+
+    fn try_from(value: &OsStr) -> Result<Self, Self::Error> {
+        crate::os_str_into_bstr(value).map(|value| {
+            assert_valid_hack(value);
+            Spec(value.into())
+        })
+    }
+}
+
+fn assert_valid_hack(input: &BStr) {
+    assert!(!input.contains_str(b"/../"));
+    assert!(!input.contains_str(b"/./"));
+    assert!(!input.starts_with_str(b"../"));
+    assert!(!input.starts_with_str(b"./"));
+    assert!(!input.starts_with_str(b"/"));
+}
+
+impl Spec {
+    /// Parse `input` into a `Spec` or `None` if it could not be parsed
+    // TODO: tests, actual implementation probably via `gix-pathspec` to make use of the crate after all.
+    pub fn from_bytes(input: &BStr) -> Option<Self> {
+        assert_valid_hack(input);
+        Spec(input.into()).into()
+    }
+    /// Return all paths described by this path spec, using slashes on all platforms.
+    pub fn items(&self) -> impl Iterator<Item = &BStr> {
+        std::iter::once(self.0.as_bstr())
+    }
+    /// Adjust this path specification according to the given `prefix`, which may be empty to indicate we are the at work-tree root.
+    // TODO: this is a hack, needs test and time to do according to spec. This is just a minimum version to have -something-.
+    pub fn apply_prefix(&mut self, prefix: &std::path::Path) -> &Self {
+        // many more things we can't handle. `Path` never ends with trailing path separator.
+        let prefix = crate::into_bstr(prefix);
+        if !prefix.is_empty() {
+            let mut prefix = crate::to_unix_separators_on_windows(prefix);
+            {
+                let path = prefix.to_mut();
+                path.push_byte(b'/');
+                path.extend_from_slice(&self.0);
+            }
+            self.0 = prefix.into_owned();
+        }
+        self
+    }
+}
diff --git a/vendor/gix-path/src/util.rs b/vendor/gix-path/src/util.rs
new file mode 100644
index 000000000..7920910d7
--- /dev/null
+++ b/vendor/gix-path/src/util.rs
@@ -0,0 +1,8 @@
+use std::path::Path;
+
+/// return true if `path` is absolute, which depends on the platform but is always true if it starts with a `slash`, hence looks like
+/// a linux path.
+pub fn is_absolute(path: impl AsRef<Path>) -> bool {
+    let path = path.as_ref();
+    path.is_absolute() || path.to_str().and_then(|s| s.chars().next()) == Some('/')
+}