diff options
Diffstat (limited to 'toolkit/xre/CmdLineAndEnvUtils.h')
-rw-r--r-- | toolkit/xre/CmdLineAndEnvUtils.h | 732 |
1 files changed, 732 insertions, 0 deletions
diff --git a/toolkit/xre/CmdLineAndEnvUtils.h b/toolkit/xre/CmdLineAndEnvUtils.h new file mode 100644 index 0000000000..cad34d2503 --- /dev/null +++ b/toolkit/xre/CmdLineAndEnvUtils.h @@ -0,0 +1,732 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_CmdLineAndEnvUtils_h +#define mozilla_CmdLineAndEnvUtils_h + +// NB: This code may be used outside of xul and thus must not depend on XPCOM + +#if defined(MOZILLA_INTERNAL_API) +# include "prenv.h" +# include "prprf.h" +# include <string.h> +#endif + +#if defined(XP_WIN) +# include "mozilla/UniquePtr.h" +# include "mozilla/Vector.h" +# include "mozilla/WinHeaderOnlyUtils.h" + +# include <wchar.h> +# include <windows.h> +#endif // defined(XP_WIN) + +#include "mozilla/Maybe.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/TypedEnumBits.h" + +#include <ctype.h> +#include <stdint.h> +#include <stdlib.h> + +#ifndef NS_NO_XPCOM +# include "nsIFile.h" +# include "mozilla/AlreadyAddRefed.h" +#endif + +// Undo X11/X.h's definition of None +#undef None + +namespace mozilla { + +enum ArgResult { + ARG_NONE = 0, + ARG_FOUND = 1, + ARG_BAD = 2 // you wanted a param, but there isn't one +}; + +template <typename CharT> +inline void RemoveArg(int& argc, CharT** argv) { + do { + *argv = *(argv + 1); + ++argv; + } while (*argv); + + --argc; +} + +namespace internal { + +#if 'a' == '\x61' +// Valid option characters must have the same representation in every locale +// (which is true for most of ASCII, barring \x5C and \x7E). +static inline constexpr bool isValidOptionCharacter(char c) { + // We specifically avoid the use of `islower` here; it's locale-dependent, and + // may return true for non-ASCII values in some locales. + return ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || c == '-'; +}; + +// Convert uppercase to lowercase, locale-insensitively. +static inline constexpr char toLowercase(char c) { + // We specifically avoid the use of `tolower` here; it's locale-dependent, and + // may output ASCII values for non-ASCII input (or vice versa) in some + // locales. + return ('A' <= c && c <= 'Z') ? char(c | ' ') : c; +}; + +// Convert a CharT to a char, ensuring that no CharT is mapped to any valid +// option character except the unique CharT naturally corresponding thereto. +template <typename CharT> +static inline constexpr char toNarrow(CharT c) { + // confirmed to compile down to nothing when `CharT` is `char` + return (c & static_cast<CharT>(0xff)) == c ? c : 0xff; +}; +#else +// The target system's character set isn't even ASCII-compatible. If you're +// porting Gecko to such a platform, you'll have to implement these yourself. +# error Character conversion functions not implemented for this platform. +#endif + +// Case-insensitively compare a string taken from the command-line (`mixedstr`) +// to the text of some known command-line option (`lowerstr`). +template <typename CharT> +static inline bool strimatch(const char* lowerstr, const CharT* mixedstr) { + while (*lowerstr) { + if (!*mixedstr) return false; // mixedstr is shorter + + // Non-ASCII strings may compare incorrectly depending on the user's locale. + // Some ASCII-safe characters are also dispermitted for semantic reasons + // and simplicity. + if (!isValidOptionCharacter(*lowerstr)) return false; + + if (toLowercase(toNarrow(*mixedstr)) != *lowerstr) { + return false; // no match + } + + ++lowerstr; + ++mixedstr; + } + + if (*mixedstr) return false; // lowerstr is shorter + + return true; +} + +// Given a command-line argument, return Nothing if it isn't structurally a +// command-line option, and Some(<the option text>) if it is. +template <typename CharT> +mozilla::Maybe<const CharT*> ReadAsOption(const CharT* str) { + if (!str) { + return Nothing(); + } + if (*str == '-') { + str++; + if (*str == '-') { + str++; + } + return Some(str); + } +#ifdef XP_WIN + if (*str == '/') { + return Some(str + 1); + } +#endif + return Nothing(); +} + +} // namespace internal + +using internal::strimatch; + +const wchar_t kCommandLineDelimiter[] = L" \t"; + +enum class CheckArgFlag : uint32_t { + None = 0, + // (1 << 0) Used to be CheckOSInt + RemoveArg = (1 << 1) // Remove the argument from the argv array. +}; + +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(CheckArgFlag) + +/** + * Check for a commandline flag. If the flag takes a parameter, the + * parameter is returned in aParam. Flags may be in the form -arg or + * --arg (or /arg on win32). + * + * @param aArgc The argc value. + * @param aArgv The original argv. + * @param aArg the parameter to check. Must be lowercase. + * @param aParam if non-null, the -arg <data> will be stored in this pointer. + * This is *not* allocated, but rather a pointer to the argv data. + * @param aFlags Flags @see CheckArgFlag + */ +template <typename CharT> +inline ArgResult CheckArg(int& aArgc, CharT** aArgv, const char* aArg, + const CharT** aParam = nullptr, + CheckArgFlag aFlags = CheckArgFlag::RemoveArg) { + using internal::ReadAsOption; + MOZ_ASSERT(aArgv && aArg); + + CharT** curarg = aArgv + 1; // skip argv[0] + ArgResult ar = ARG_NONE; + + while (*curarg) { + if (const auto arg = ReadAsOption(*curarg)) { + if (strimatch(aArg, arg.value())) { + if (aFlags & CheckArgFlag::RemoveArg) { + RemoveArg(aArgc, curarg); + } else { + ++curarg; + } + + if (!aParam) { + ar = ARG_FOUND; + break; + } + + if (*curarg) { + if (ReadAsOption(*curarg)) { + return ARG_BAD; + } + + *aParam = *curarg; + + if (aFlags & CheckArgFlag::RemoveArg) { + RemoveArg(aArgc, curarg); + } + + ar = ARG_FOUND; + break; + } + + return ARG_BAD; + } + } + + ++curarg; + } + + return ar; +} + +template <typename CharT> +inline ArgResult CheckArg(int& aArgc, CharT** aArgv, const char* aArg, + std::nullptr_t, + CheckArgFlag aFlags = CheckArgFlag::RemoveArg) { + return CheckArg<CharT>(aArgc, aArgv, aArg, + static_cast<const CharT**>(nullptr), aFlags); +} + +namespace internal { +// template <typename T> +// constexpr bool IsStringRange = +// std::convertible_to<std::ranges::range_value_t<T>, const char *>; + +template <typename CharT, typename ListT> +// requires IsStringRange<ListT> +static bool MatchesAnyOf(CharT const* unknown, ListT const& known) { + for (const char* k : known) { + if (strimatch(k, unknown)) { + return true; + } + } + return false; +} + +template <typename CharT, typename ReqContainerT, typename OptContainerT> +// requires IsStringRange<ReqContainerT> && IsStringRange<OptContainerT> +inline bool EnsureCommandlineSafeImpl(int aArgc, CharT** aArgv, + ReqContainerT const& requiredParams, + OptContainerT const& optionalParams) { + // We expect either no -osint, or the full commandline to be: + // + // app -osint [<optional-param>...] <required-param> <required-argument> + // + // Otherwise, we abort to avoid abuse of other command-line handlers from apps + // that do a poor job escaping links they give to the OS. + // + // Note that the above implies that optional parameters do not themselves take + // arguments. This is a security feature, to prevent the possible injection of + // additional parameters via such arguments. (See, e.g., bug 384384.) + + static constexpr const char* osintLit = "osint"; + + // If "-osint" (or the equivalent) is not present, then this is trivially + // satisfied. + if (CheckArg(aArgc, aArgv, osintLit, nullptr, CheckArgFlag::None) != + ARG_FOUND) { + return true; + } + + // There should be at least 4 items present: + // <app name> -osint <required param> <arg>. + if (aArgc < 4) { + return false; + } + + // The first parameter must be osint. + const auto arg1 = ReadAsOption(aArgv[1]); + if (!arg1) return false; + if (!strimatch(osintLit, arg1.value())) { + return false; + } + // Following this is any number of optional parameters, terminated by a + // required parameter. + int pos = 2; + while (true) { + if (pos >= aArgc) return false; + + auto const arg = ReadAsOption(aArgv[pos]); + if (!arg) return false; + + if (MatchesAnyOf(arg.value(), optionalParams)) { + ++pos; + continue; + } + + if (MatchesAnyOf(arg.value(), requiredParams)) { + ++pos; + break; + } + + return false; + } + + // There must be one argument remaining... + if (pos + 1 != aArgc) return false; + // ... which must not be another option. + if (ReadAsOption(aArgv[pos])) { + return false; + } + + // Nothing ill-formed was passed. + return true; +} + +// C (and so C++) disallows empty arrays. Rather than require callers to jump +// through hoops to specify an empty optional-argument list, allow either its +// omission or its specification as `nullptr`, and do the hoop-jumping here. +// +// No such facility is provided for requiredParams, which must have at least one +// entry. +template <typename CharT, typename ReqContainerT> +inline bool EnsureCommandlineSafeImpl(int aArgc, CharT** aArgv, + ReqContainerT const& requiredParams, + std::nullptr_t _ = nullptr) { + struct { + inline const char** begin() const { return nullptr; } + inline const char** end() const { return nullptr; } + } emptyContainer; + return EnsureCommandlineSafeImpl(aArgc, aArgv, requiredParams, + emptyContainer); +} +} // namespace internal + +template <typename CharT, typename ReqContainerT, + typename OptContainerT = std::nullptr_t> +inline void EnsureCommandlineSafe( + int aArgc, CharT** aArgv, ReqContainerT const& requiredParams, + OptContainerT const& optionalParams = nullptr) { + if (!internal::EnsureCommandlineSafeImpl(aArgc, aArgv, requiredParams, + optionalParams)) { + exit(127); + } +} + +#if defined(XP_WIN) +namespace internal { +/** + * Get the length that the string will take and takes into account the + * additional length if the string needs to be quoted and if characters need to + * be escaped. + */ +inline int ArgStrLen(const wchar_t* s) { + int backslashes = 0; + int i = wcslen(s); + bool hasDoubleQuote = wcschr(s, L'"') != nullptr; + // Only add doublequotes if the string contains a space or a tab + bool addDoubleQuotes = wcspbrk(s, kCommandLineDelimiter) != nullptr; + + if (addDoubleQuotes) { + i += 2; // initial and final duoblequote + } + + if (hasDoubleQuote) { + while (*s) { + if (*s == '\\') { + ++backslashes; + } else { + if (*s == '"') { + // Escape the doublequote and all backslashes preceding the + // doublequote + i += backslashes + 1; + } + + backslashes = 0; + } + + ++s; + } + } + + return i; +} + +/** + * Copy string "s" to string "d", quoting the argument as appropriate and + * escaping doublequotes along with any backslashes that immediately precede + * doublequotes. + * The CRT parses this to retrieve the original argc/argv that we meant, + * see STDARGV.C in the MSVC CRT sources. + * + * @return the end of the string + */ +inline wchar_t* ArgToString(wchar_t* d, const wchar_t* s) { + int backslashes = 0; + bool hasDoubleQuote = wcschr(s, L'"') != nullptr; + // Only add doublequotes if the string contains a space or a tab + bool addDoubleQuotes = wcspbrk(s, kCommandLineDelimiter) != nullptr; + + if (addDoubleQuotes) { + *d = '"'; // initial doublequote + ++d; + } + + if (hasDoubleQuote) { + int i; + while (*s) { + if (*s == '\\') { + ++backslashes; + } else { + if (*s == '"') { + // Escape the doublequote and all backslashes preceding the + // doublequote + for (i = 0; i <= backslashes; ++i) { + *d = '\\'; + ++d; + } + } + + backslashes = 0; + } + + *d = *s; + ++d; + ++s; + } + } else { + wcscpy(d, s); + d += wcslen(s); + } + + if (addDoubleQuotes) { + *d = '"'; // final doublequote + ++d; + } + + return d; +} + +} // namespace internal + +/** + * Creates a command line from a list of arguments. + * + * @param argc Number of elements in |argv| + * @param argv Array of arguments + * @param aArgcExtra Number of elements in |aArgvExtra| + * @param aArgvExtra Optional array of arguments to be appended to the resulting + * command line after those provided by |argv|. + */ +inline UniquePtr<wchar_t[]> MakeCommandLine( + int argc, const wchar_t* const* argv, int aArgcExtra = 0, + const wchar_t* const* aArgvExtra = nullptr) { + int i; + int len = 0; + + // The + 1 for each argument reserves space for either a ' ' or the null + // terminator, depending on the position of the argument. + for (i = 0; i < argc; ++i) { + len += internal::ArgStrLen(argv[i]) + 1; + } + + for (i = 0; i < aArgcExtra; ++i) { + len += internal::ArgStrLen(aArgvExtra[i]) + 1; + } + + // Protect against callers that pass 0 arguments + if (len == 0) { + len = 1; + } + + auto s = MakeUnique<wchar_t[]>(len); + + int totalArgc = argc + aArgcExtra; + + wchar_t* c = s.get(); + for (i = 0; i < argc; ++i) { + c = internal::ArgToString(c, argv[i]); + if (i + 1 != totalArgc) { + *c = ' '; + ++c; + } + } + + for (i = 0; i < aArgcExtra; ++i) { + c = internal::ArgToString(c, aArgvExtra[i]); + if (i + 1 != aArgcExtra) { + *c = ' '; + ++c; + } + } + + *c = '\0'; + + return s; +} + +inline bool SetArgv0ToFullBinaryPath(wchar_t* aArgv[]) { + if (!aArgv) { + return false; + } + + UniquePtr<wchar_t[]> newArgv_0(GetFullBinaryPath()); + if (!newArgv_0) { + return false; + } + + // We intentionally leak newArgv_0 into argv[0] + aArgv[0] = newArgv_0.release(); + MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(aArgv[0]); + return true; +} + +# if defined(MOZILLA_INTERNAL_API) +// This class converts a command line string into an array of the arguments. +// It's basically the opposite of MakeCommandLine. However, the behavior is +// different from ::CommandLineToArgvW in several ways, such as escaping a +// backslash or quoting an argument containing whitespaces. This satisfies +// the examples at: +// https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines +// https://docs.microsoft.com/en-us/previous-versions/17w5ykft(v=vs.85) +template <typename T> +class CommandLineParserWin final { + int mArgc; + T** mArgv; + + void Release() { + if (mArgv) { + while (mArgc) { + delete[] mArgv[--mArgc]; + } + delete[] mArgv; + mArgv = nullptr; + } + } + + public: + CommandLineParserWin() : mArgc(0), mArgv(nullptr) {} + ~CommandLineParserWin() { Release(); } + + CommandLineParserWin(const CommandLineParserWin&) = delete; + CommandLineParserWin(CommandLineParserWin&&) = delete; + CommandLineParserWin& operator=(const CommandLineParserWin&) = delete; + CommandLineParserWin& operator=(CommandLineParserWin&&) = delete; + + int Argc() const { return mArgc; } + const T* const* Argv() const { return mArgv; } + + // Returns the number of characters handled + int HandleCommandLine(const nsTSubstring<T>& aCmdLineString) { + Release(); + + if (aCmdLineString.IsEmpty()) { + return 0; + } + + int justCounting = 1; + // Flags, etc. + int init = 1; + int between, quoted, bSlashCount; + const T* p; + const T* const pEnd = aCmdLineString.EndReading(); + nsTAutoString<T> arg; + + // We loop if we've not finished the second pass through. + while (1) { + // Initialize if required. + if (init) { + p = aCmdLineString.BeginReading(); + between = 1; + mArgc = quoted = bSlashCount = 0; + + init = 0; + } + + const T charCurr = (p < pEnd) ? *p : 0; + const T charNext = (p + 1 < pEnd) ? *(p + 1) : 0; + + if (between) { + // We are traversing whitespace between args. + // Check for start of next arg. + if (charCurr != 0 && !wcschr(kCommandLineDelimiter, charCurr)) { + // Start of another arg. + between = 0; + arg.Truncate(); + switch (charCurr) { + case '\\': + // Count the backslash. + bSlashCount = 1; + break; + case '"': + // Remember we're inside quotes. + quoted = 1; + break; + default: + // Add character to arg. + arg += charCurr; + break; + } + } else { + // Another space between args, ignore it. + } + } else { + // We are processing the contents of an argument. + // Check for whitespace or end. + if (charCurr == 0 || + (!quoted && wcschr(kCommandLineDelimiter, charCurr))) { + // Process pending backslashes (interpret them + // literally since they're not followed by a "). + while (bSlashCount) { + arg += '\\'; + bSlashCount--; + } + // End current arg. + if (!justCounting) { + mArgv[mArgc] = new T[arg.Length() + 1]; + memcpy(mArgv[mArgc], arg.get(), (arg.Length() + 1) * sizeof(T)); + } + mArgc++; + // We're now between args. + between = 1; + } else { + // Still inside argument, process the character. + switch (charCurr) { + case '"': + // First, digest preceding backslashes (if any). + while (bSlashCount > 1) { + // Put one backsplash in arg for each pair. + arg += '\\'; + bSlashCount -= 2; + } + if (bSlashCount) { + // Quote is literal. + arg += '"'; + bSlashCount = 0; + } else { + // Quote starts or ends a quoted section. + if (quoted) { + // Check for special case of consecutive double + // quotes inside a quoted section. + if (charNext == '"') { + // This implies a literal double-quote. Fake that + // out by causing next double-quote to look as + // if it was preceded by a backslash. + bSlashCount = 1; + } else { + quoted = 0; + } + } else { + quoted = 1; + } + } + break; + case '\\': + // Add to count. + bSlashCount++; + break; + default: + // Accept any preceding backslashes literally. + while (bSlashCount) { + arg += '\\'; + bSlashCount--; + } + // Just add next char to the current arg. + arg += charCurr; + break; + } + } + } + + // Check for end of input. + if (charCurr) { + // Go to next character. + p++; + } else { + // If on first pass, go on to second. + if (justCounting) { + // Allocate argv array. + mArgv = new T*[mArgc]; + + // Start second pass + justCounting = 0; + init = 1; + } else { + // Quit. + break; + } + } + } + + return p - aCmdLineString.BeginReading(); + } +}; +# endif // defined(MOZILLA_INTERNAL_API) + +#endif // defined(XP_WIN) + +// SaveToEnv and EnvHasValue are only available on Windows or when +// MOZILLA_INTERNAL_API is defined +#if defined(MOZILLA_INTERNAL_API) || defined(XP_WIN) + +// Save literal putenv string to environment variable. +MOZ_NEVER_INLINE inline void SaveToEnv(const char* aEnvString) { +# if defined(MOZILLA_INTERNAL_API) + char* expr = strdup(aEnvString); + if (expr) { + PR_SetEnv(expr); + } + + // We intentionally leak |expr| here since it is required by PR_SetEnv. + MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(expr); +# elif defined(XP_WIN) + // This is the same as the NSPR implementation + // (Note that we don't need to do a strdup for this case; the CRT makes a + // copy) + _putenv(aEnvString); +# endif +} + +inline bool EnvHasValue(const char* aVarName) { +# if defined(MOZILLA_INTERNAL_API) + const char* val = PR_GetEnv(aVarName); + return val && *val; +# elif defined(XP_WIN) + // This is the same as the NSPR implementation + const char* val = getenv(aVarName); + return val && *val; +# endif +} + +#endif // end windows/internal_api-only definitions + +#ifndef NS_NO_XPCOM +already_AddRefed<nsIFile> GetFileFromEnv(const char* name); +#endif + +} // namespace mozilla + +#endif // mozilla_CmdLineAndEnvUtils_h |