summaryrefslogtreecommitdiffstats
path: root/library/std/src/sys/windows/args.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /library/std/src/sys/windows/args.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/std/src/sys/windows/args.rs')
-rw-r--r--library/std/src/sys/windows/args.rs406
1 files changed, 406 insertions, 0 deletions
diff --git a/library/std/src/sys/windows/args.rs b/library/std/src/sys/windows/args.rs
new file mode 100644
index 000000000..01f262982
--- /dev/null
+++ b/library/std/src/sys/windows/args.rs
@@ -0,0 +1,406 @@
+//! The Windows command line is just a string
+//! <https://docs.microsoft.com/en-us/archive/blogs/larryosterman/the-windows-command-line-is-just-a-string>
+//!
+//! This module implements the parsing necessary to turn that string into a list of arguments.
+
+#[cfg(test)]
+mod tests;
+
+use crate::ffi::OsString;
+use crate::fmt;
+use crate::io;
+use crate::marker::PhantomData;
+use crate::num::NonZeroU16;
+use crate::os::windows::prelude::*;
+use crate::path::PathBuf;
+use crate::ptr::NonNull;
+use crate::sys::c;
+use crate::sys::process::ensure_no_nuls;
+use crate::sys::windows::os::current_exe;
+use crate::vec;
+
+use core::iter;
+
+/// This is the const equivalent to `NonZeroU16::new(n).unwrap()`
+///
+/// FIXME: This can be removed once `Option::unwrap` is stably const.
+/// See the `const_option` feature (#67441).
+const fn non_zero_u16(n: u16) -> NonZeroU16 {
+ match NonZeroU16::new(n) {
+ Some(n) => n,
+ None => panic!("called `unwrap` on a `None` value"),
+ }
+}
+
+pub fn args() -> Args {
+ // SAFETY: `GetCommandLineW` returns a pointer to a null terminated UTF-16
+ // string so it's safe for `WStrUnits` to use.
+ unsafe {
+ let lp_cmd_line = c::GetCommandLineW();
+ let parsed_args_list = parse_lp_cmd_line(WStrUnits::new(lp_cmd_line), || {
+ current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new())
+ });
+
+ Args { parsed_args_list: parsed_args_list.into_iter() }
+ }
+}
+
+/// Implements the Windows command-line argument parsing algorithm.
+///
+/// Microsoft's documentation for the Windows CLI argument format can be found at
+/// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
+///
+/// A more in-depth explanation is here:
+/// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
+///
+/// Windows includes a function to do command line parsing in shell32.dll.
+/// However, this is not used for two reasons:
+///
+/// 1. Linking with that DLL causes the process to be registered as a GUI application.
+/// GUI applications add a bunch of overhead, even if no windows are drawn. See
+/// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
+///
+/// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
+///
+/// This function was tested for equivalence to the C/C++ parsing rules using an
+/// extensive test suite available at
+/// <https://github.com/ChrisDenton/winarg/tree/std>.
+fn parse_lp_cmd_line<'a, F: Fn() -> OsString>(
+ lp_cmd_line: Option<WStrUnits<'a>>,
+ exe_name: F,
+) -> Vec<OsString> {
+ const BACKSLASH: NonZeroU16 = non_zero_u16(b'\\' as u16);
+ const QUOTE: NonZeroU16 = non_zero_u16(b'"' as u16);
+ const TAB: NonZeroU16 = non_zero_u16(b'\t' as u16);
+ const SPACE: NonZeroU16 = non_zero_u16(b' ' as u16);
+
+ let mut ret_val = Vec::new();
+ // If the cmd line pointer is null or it points to an empty string then
+ // return the name of the executable as argv[0].
+ if lp_cmd_line.as_ref().and_then(|cmd| cmd.peek()).is_none() {
+ ret_val.push(exe_name());
+ return ret_val;
+ }
+ let mut code_units = lp_cmd_line.unwrap();
+
+ // The executable name at the beginning is special.
+ let mut in_quotes = false;
+ let mut cur = Vec::new();
+ for w in &mut code_units {
+ match w {
+ // A quote mark always toggles `in_quotes` no matter what because
+ // there are no escape characters when parsing the executable name.
+ QUOTE => in_quotes = !in_quotes,
+ // If not `in_quotes` then whitespace ends argv[0].
+ SPACE | TAB if !in_quotes => break,
+ // In all other cases the code unit is taken literally.
+ _ => cur.push(w.get()),
+ }
+ }
+ // Skip whitespace.
+ code_units.advance_while(|w| w == SPACE || w == TAB);
+ ret_val.push(OsString::from_wide(&cur));
+
+ // Parse the arguments according to these rules:
+ // * All code units are taken literally except space, tab, quote and backslash.
+ // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
+ // treated as a single separator.
+ // * A space or tab `in_quotes` is taken literally.
+ // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
+ // * A quote can be escaped if preceded by an odd number of backslashes.
+ // * If any number of backslashes is immediately followed by a quote then the number of
+ // backslashes is halved (rounding down).
+ // * Backslashes not followed by a quote are all taken literally.
+ // * If `in_quotes` then a quote can also be escaped using another quote
+ // (i.e. two consecutive quotes become one literal quote).
+ let mut cur = Vec::new();
+ let mut in_quotes = false;
+ while let Some(w) = code_units.next() {
+ match w {
+ // If not `in_quotes`, a space or tab ends the argument.
+ SPACE | TAB if !in_quotes => {
+ ret_val.push(OsString::from_wide(&cur[..]));
+ cur.truncate(0);
+
+ // Skip whitespace.
+ code_units.advance_while(|w| w == SPACE || w == TAB);
+ }
+ // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
+ BACKSLASH => {
+ let backslash_count = code_units.advance_while(|w| w == BACKSLASH) + 1;
+ if code_units.peek() == Some(QUOTE) {
+ cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count / 2));
+ // The quote is escaped if there are an odd number of backslashes.
+ if backslash_count % 2 == 1 {
+ code_units.next();
+ cur.push(QUOTE.get());
+ }
+ } else {
+ // If there is no quote on the end then there is no escaping.
+ cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count));
+ }
+ }
+ // If `in_quotes` and not backslash escaped (see above) then a quote either
+ // unsets `in_quote` or is escaped by another quote.
+ QUOTE if in_quotes => match code_units.peek() {
+ // Two consecutive quotes when `in_quotes` produces one literal quote.
+ Some(QUOTE) => {
+ cur.push(QUOTE.get());
+ code_units.next();
+ }
+ // Otherwise set `in_quotes`.
+ Some(_) => in_quotes = false,
+ // The end of the command line.
+ // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
+ None => break,
+ },
+ // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
+ QUOTE => in_quotes = true,
+ // Everything else is always taken literally.
+ _ => cur.push(w.get()),
+ }
+ }
+ // Push the final argument, if any.
+ if !cur.is_empty() || in_quotes {
+ ret_val.push(OsString::from_wide(&cur[..]));
+ }
+ ret_val
+}
+
+pub struct Args {
+ parsed_args_list: vec::IntoIter<OsString>,
+}
+
+impl fmt::Debug for Args {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.parsed_args_list.as_slice().fmt(f)
+ }
+}
+
+impl Iterator for Args {
+ type Item = OsString;
+ fn next(&mut self) -> Option<OsString> {
+ self.parsed_args_list.next()
+ }
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.parsed_args_list.size_hint()
+ }
+}
+
+impl DoubleEndedIterator for Args {
+ fn next_back(&mut self) -> Option<OsString> {
+ self.parsed_args_list.next_back()
+ }
+}
+
+impl ExactSizeIterator for Args {
+ fn len(&self) -> usize {
+ self.parsed_args_list.len()
+ }
+}
+
+/// A safe iterator over a LPWSTR
+/// (aka a pointer to a series of UTF-16 code units terminated by a NULL).
+struct WStrUnits<'a> {
+ // The pointer must never be null...
+ lpwstr: NonNull<u16>,
+ // ...and the memory it points to must be valid for this lifetime.
+ lifetime: PhantomData<&'a [u16]>,
+}
+impl WStrUnits<'_> {
+ /// Create the iterator. Returns `None` if `lpwstr` is null.
+ ///
+ /// SAFETY: `lpwstr` must point to a null-terminated wide string that lives
+ /// at least as long as the lifetime of this struct.
+ unsafe fn new(lpwstr: *const u16) -> Option<Self> {
+ Some(Self { lpwstr: NonNull::new(lpwstr as _)?, lifetime: PhantomData })
+ }
+ fn peek(&self) -> Option<NonZeroU16> {
+ // SAFETY: It's always safe to read the current item because we don't
+ // ever move out of the array's bounds.
+ unsafe { NonZeroU16::new(*self.lpwstr.as_ptr()) }
+ }
+ /// Advance the iterator while `predicate` returns true.
+ /// Returns the number of items it advanced by.
+ fn advance_while<P: FnMut(NonZeroU16) -> bool>(&mut self, mut predicate: P) -> usize {
+ let mut counter = 0;
+ while let Some(w) = self.peek() {
+ if !predicate(w) {
+ break;
+ }
+ counter += 1;
+ self.next();
+ }
+ counter
+ }
+}
+impl Iterator for WStrUnits<'_> {
+ // This can never return zero as that marks the end of the string.
+ type Item = NonZeroU16;
+ fn next(&mut self) -> Option<NonZeroU16> {
+ // SAFETY: If NULL is reached we immediately return.
+ // Therefore it's safe to advance the pointer after that.
+ unsafe {
+ let next = self.peek()?;
+ self.lpwstr = NonNull::new_unchecked(self.lpwstr.as_ptr().add(1));
+ Some(next)
+ }
+ }
+}
+
+#[derive(Debug)]
+pub(crate) enum Arg {
+ /// Add quotes (if needed)
+ Regular(OsString),
+ /// Append raw string without quoting
+ Raw(OsString),
+}
+
+enum Quote {
+ // Every arg is quoted
+ Always,
+ // Whitespace and empty args are quoted
+ Auto,
+ // Arg appended without any changes (#29494)
+ Never,
+}
+
+pub(crate) fn append_arg(cmd: &mut Vec<u16>, arg: &Arg, force_quotes: bool) -> io::Result<()> {
+ let (arg, quote) = match arg {
+ Arg::Regular(arg) => (arg, if force_quotes { Quote::Always } else { Quote::Auto }),
+ Arg::Raw(arg) => (arg, Quote::Never),
+ };
+
+ // If an argument has 0 characters then we need to quote it to ensure
+ // that it actually gets passed through on the command line or otherwise
+ // it will be dropped entirely when parsed on the other end.
+ ensure_no_nuls(arg)?;
+ let arg_bytes = arg.bytes();
+ let (quote, escape) = match quote {
+ Quote::Always => (true, true),
+ Quote::Auto => {
+ (arg_bytes.iter().any(|c| *c == b' ' || *c == b'\t') || arg_bytes.is_empty(), true)
+ }
+ Quote::Never => (false, false),
+ };
+ if quote {
+ cmd.push('"' as u16);
+ }
+
+ let mut backslashes: usize = 0;
+ for x in arg.encode_wide() {
+ if escape {
+ if x == '\\' as u16 {
+ backslashes += 1;
+ } else {
+ if x == '"' as u16 {
+ // Add n+1 backslashes to total 2n+1 before internal '"'.
+ cmd.extend((0..=backslashes).map(|_| '\\' as u16));
+ }
+ backslashes = 0;
+ }
+ }
+ cmd.push(x);
+ }
+
+ if quote {
+ // Add n backslashes to total 2n before ending '"'.
+ cmd.extend((0..backslashes).map(|_| '\\' as u16));
+ cmd.push('"' as u16);
+ }
+ Ok(())
+}
+
+pub(crate) fn make_bat_command_line(
+ script: &[u16],
+ args: &[Arg],
+ force_quotes: bool,
+) -> io::Result<Vec<u16>> {
+ // Set the start of the command line to `cmd.exe /c "`
+ // It is necessary to surround the command in an extra pair of quotes,
+ // hence the trailing quote here. It will be closed after all arguments
+ // have been added.
+ let mut cmd: Vec<u16> = "cmd.exe /c \"".encode_utf16().collect();
+
+ // Push the script name surrounded by its quote pair.
+ cmd.push(b'"' as u16);
+ // Windows file names cannot contain a `"` character or end with `\\`.
+ // If the script name does then return an error.
+ if script.contains(&(b'"' as u16)) || script.last() == Some(&(b'\\' as u16)) {
+ return Err(io::const_io_error!(
+ io::ErrorKind::InvalidInput,
+ "Windows file names may not contain `\"` or end with `\\`"
+ ));
+ }
+ cmd.extend_from_slice(script.strip_suffix(&[0]).unwrap_or(script));
+ cmd.push(b'"' as u16);
+
+ // Append the arguments.
+ // FIXME: This needs tests to ensure that the arguments are properly
+ // reconstructed by the batch script by default.
+ for arg in args {
+ cmd.push(' ' as u16);
+ append_arg(&mut cmd, arg, force_quotes)?;
+ }
+
+ // Close the quote we left opened earlier.
+ cmd.push(b'"' as u16);
+
+ Ok(cmd)
+}
+
+/// Takes a path and tries to return a non-verbatim path.
+///
+/// This is necessary because cmd.exe does not support verbatim paths.
+pub(crate) fn to_user_path(mut path: Vec<u16>) -> io::Result<Vec<u16>> {
+ use crate::ptr;
+ use crate::sys::windows::fill_utf16_buf;
+
+ // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
+ // All of these are in the ASCII range so they can be cast directly to `u16`.
+ const SEP: u16 = b'\\' as _;
+ const QUERY: u16 = b'?' as _;
+ const COLON: u16 = b':' as _;
+ const U: u16 = b'U' as _;
+ const N: u16 = b'N' as _;
+ const C: u16 = b'C' as _;
+
+ // Early return if the path is too long to remove the verbatim prefix.
+ const LEGACY_MAX_PATH: usize = 260;
+ if path.len() > LEGACY_MAX_PATH {
+ return Ok(path);
+ }
+
+ match &path[..] {
+ // `\\?\C:\...` => `C:\...`
+ [SEP, SEP, QUERY, SEP, _, COLON, SEP, ..] => unsafe {
+ let lpfilename = path[4..].as_ptr();
+ fill_utf16_buf(
+ |buffer, size| c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()),
+ |full_path: &[u16]| {
+ if full_path == &path[4..path.len() - 1] { full_path.into() } else { path }
+ },
+ )
+ },
+ // `\\?\UNC\...` => `\\...`
+ [SEP, SEP, QUERY, SEP, U, N, C, SEP, ..] => unsafe {
+ // Change the `C` in `UNC\` to `\` so we can get a slice that starts with `\\`.
+ path[6] = b'\\' as u16;
+ let lpfilename = path[6..].as_ptr();
+ fill_utf16_buf(
+ |buffer, size| c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()),
+ |full_path: &[u16]| {
+ if full_path == &path[6..path.len() - 1] {
+ full_path.into()
+ } else {
+ // Restore the 'C' in "UNC".
+ path[6] = b'C' as u16;
+ path
+ }
+ },
+ )
+ },
+ // For everything else, leave the path unchanged.
+ _ => Ok(path),
+ }
+}