summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/common/misc/getoptargv.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Runtime/common/misc/getoptargv.cpp')
-rw-r--r--src/VBox/Runtime/common/misc/getoptargv.cpp654
1 files changed, 654 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/misc/getoptargv.cpp b/src/VBox/Runtime/common/misc/getoptargv.cpp
new file mode 100644
index 00000000..9a84507a
--- /dev/null
+++ b/src/VBox/Runtime/common/misc/getoptargv.cpp
@@ -0,0 +1,654 @@
+/* $Id: getoptargv.cpp $ */
+/** @file
+ * IPRT - Command Line Parsing, Argument Vector.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/getopt.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+/**
+ * Array indexed by the quoting type and 7-bit ASCII character.
+ *
+ * We include some extra stuff here that the corresponding shell would normally
+ * require quoting of.
+ */
+static uint8_t
+#ifndef IPRT_REGENERATE_QUOTE_CHARS
+const
+#endif
+g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][16] =
+{
+ { 0xfe, 0xff, 0xff, 0xff, 0x65, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 },
+ { 0xfe, 0xff, 0xff, 0xff, 0xd7, 0x07, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x18, 0x01, 0x00, 0x00, 0x50 },
+};
+
+
+#ifdef IPRT_REGENERATE_QUOTE_CHARS /* To re-generate the bitmaps. */
+# include <stdio.h>
+int main()
+{
+ RT_ZERO(g_abmQuoteChars);
+
+# define SET_ALL(ch) \
+ do { \
+ for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
+ ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
+ } while (0)
+# define SET(ConstSuffix, ch) \
+ do { \
+ ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch)); \
+ printf(#ConstSuffix ": %#x %d %c\n", (ch), (ch), (ch)); \
+ } while (0)
+
+ /* just flag all the control chars as in need of quoting. */
+ for (char ch = 1; ch < 0x20; ch++)
+ SET_ALL(ch);
+
+ /* ... and space of course */
+ SET_ALL(' ');
+
+ /* MS CRT / CMD.EXE: */
+ SET(MS_CRT, '"');
+ SET(MS_CRT, '&');
+ SET(MS_CRT, '>');
+ SET(MS_CRT, '<');
+ SET(MS_CRT, '|');
+ SET(MS_CRT, '%');
+
+ /* Bourne shell: */
+ SET(BOURNE_SH, '!');
+ SET(BOURNE_SH, '"');
+ SET(BOURNE_SH, '$');
+ SET(BOURNE_SH, '&');
+ SET(BOURNE_SH, '(');
+ SET(BOURNE_SH, ')');
+ SET(BOURNE_SH, '*');
+ SET(BOURNE_SH, ';');
+ SET(BOURNE_SH, '<');
+ SET(BOURNE_SH, '>');
+ SET(BOURNE_SH, '?');
+ SET(BOURNE_SH, '[');
+ SET(BOURNE_SH, '\'');
+ SET(BOURNE_SH, '\\');
+ SET(BOURNE_SH, '`');
+ SET(BOURNE_SH, '|');
+ SET(BOURNE_SH, '~');
+
+ for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
+ {
+ printf(" {");
+ for (size_t iByte = 0; iByte < 16; iByte++)
+ printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
+ printf(" },\n");
+ }
+ return 0;
+}
+
+#else /* !IPRT_REGENERATE_QUOTE_CHARS */
+
+/**
+ * Look for an unicode code point in the separator string.
+ *
+ * @returns true if it's a separator, false if it isn't.
+ * @param Cp The code point.
+ * @param pszSeparators The separators.
+ */
+static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
+{
+ /* This could be done in a more optimal fashion. Probably worth a
+ separate RTStr function at some point. */
+ for (;;)
+ {
+ RTUNICP CpSep;
+ int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
+ AssertRCReturn(rc, false);
+ if (CpSep == Cp)
+ return true;
+ if (!CpSep)
+ return false;
+ }
+}
+
+
+/**
+ * Look for an 7-bit ASCII character in the separator string.
+ *
+ * @returns true if it's a separator, false if it isn't.
+ * @param ch The character.
+ * @param pszSeparators The separators.
+ * @param cchSeparators The number of separators chars.
+ */
+DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
+{
+ switch (cchSeparators)
+ {
+ case 8: if (ch == pszSeparators[7]) return true; RT_FALL_THRU();
+ case 7: if (ch == pszSeparators[6]) return true; RT_FALL_THRU();
+ case 6: if (ch == pszSeparators[5]) return true; RT_FALL_THRU();
+ case 5: if (ch == pszSeparators[4]) return true; RT_FALL_THRU();
+ case 4: if (ch == pszSeparators[3]) return true; RT_FALL_THRU();
+ case 3: if (ch == pszSeparators[2]) return true; RT_FALL_THRU();
+ case 2: if (ch == pszSeparators[1]) return true; RT_FALL_THRU();
+ case 1: if (ch == pszSeparators[0]) return true;
+ return false;
+ default:
+ return memchr(pszSeparators, ch, cchSeparators) != NULL;
+ }
+}
+
+
+/**
+ * Checks if the character is in the set of separators
+ *
+ * @returns true if it is, false if it isn't.
+ *
+ * @param Cp The code point.
+ * @param pszSeparators The separators.
+ * @param cchSeparators The length of @a pszSeparators.
+ */
+DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
+{
+ if (RT_LIKELY(Cp <= 127))
+ return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
+ return rtGetOptIsUniCpInString(Cp, pszSeparators);
+}
+
+
+/**
+ * Skips any delimiters at the start of the string that is pointed to.
+ *
+ * @returns VINF_SUCCESS or RTStrGetCpEx status code.
+ * @param ppszSrc Where to get and return the string pointer.
+ * @param pszSeparators The separators.
+ * @param cchSeparators The length of @a pszSeparators.
+ */
+static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
+{
+ const char *pszSrc = *ppszSrc;
+ const char *pszRet;
+ for (;;)
+ {
+ pszRet = pszSrc;
+ RTUNICP Cp;
+ int rc = RTStrGetCpEx(&pszSrc, &Cp);
+ if (RT_FAILURE(rc))
+ {
+ *ppszSrc = pszRet;
+ return rc;
+ }
+ if ( !Cp
+ || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
+ break;
+ }
+
+ *ppszSrc = pszRet;
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine,
+ uint32_t fFlags, const char *pszSeparators)
+{
+ /*
+ * Some input validation.
+ */
+ AssertPtr(pszCmdLine);
+ AssertPtr(pcArgs);
+ AssertPtr(ppapszArgv);
+ AssertReturn( (fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH
+ || (fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT, VERR_INVALID_FLAGS);
+ AssertReturn(~(fFlags & ~RTGETOPTARGV_CNV_VALID_MASK), VERR_INVALID_FLAGS);
+
+ if (!pszSeparators)
+ pszSeparators = " \t\n\r";
+ else
+ AssertPtr(pszSeparators);
+ size_t const cchSeparators = strlen(pszSeparators);
+ AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
+
+ /*
+ * Parse the command line and chop off it into argv individual argv strings.
+ */
+ const char *pszSrc = pszCmdLine;
+ char *pszDup = NULL;
+ char *pszDst;
+ if (fFlags & RTGETOPTARGV_CNV_MODIFY_INPUT)
+ pszDst = (char *)pszCmdLine;
+ else
+ {
+ pszDst = pszDup = (char *)RTMemAlloc(strlen(pszSrc) + 1);
+ if (!pszDup)
+ return VERR_NO_STR_MEMORY;
+ }
+ int rc = VINF_SUCCESS;
+ char **papszArgs = NULL;
+ unsigned iArg = 0;
+ while (*pszSrc)
+ {
+ /* Skip stuff */
+ rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
+ if (RT_FAILURE(rc))
+ break;
+ if (!*pszSrc)
+ break;
+
+ /* Start a new entry. */
+ if ((iArg % 32) == 0)
+ {
+ void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
+ if (!pvNew)
+ {
+ rc = VERR_NO_MEMORY;
+ break;
+ }
+ papszArgs = (char **)pvNew;
+ }
+ papszArgs[iArg++] = pszDst;
+
+ /*
+ * Parse and copy the string over.
+ */
+ RTUNICP uc;
+ if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_BOURNE_SH)
+ {
+ /*
+ * Bourne shell style.
+ */
+ RTUNICP ucQuote = 0;
+ for (;;)
+ {
+ rc = RTStrGetCpEx(&pszSrc, &uc);
+ if (RT_FAILURE(rc) || !uc)
+ break;
+ if (!ucQuote)
+ {
+ if (uc == '"' || uc == '\'')
+ ucQuote = uc;
+ else if (rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
+ break;
+ else if (uc != '\\')
+ pszDst = RTStrPutCp(pszDst, uc);
+ else
+ {
+ /* escaped char */
+ rc = RTStrGetCpEx(&pszSrc, &uc);
+ if (RT_FAILURE(rc) || !uc)
+ break;
+ pszDst = RTStrPutCp(pszDst, uc);
+ }
+ }
+ else if (ucQuote != uc)
+ {
+ if (uc != '\\' || ucQuote == '\'')
+ pszDst = RTStrPutCp(pszDst, uc);
+ else
+ {
+ /* escaped char */
+ rc = RTStrGetCpEx(&pszSrc, &uc);
+ if (RT_FAILURE(rc) || !uc)
+ break;
+ if ( uc != '"'
+ && uc != '\\'
+ && uc != '`'
+ && uc != '$'
+ && uc != '\n')
+ pszDst = RTStrPutCp(pszDst, ucQuote);
+ pszDst = RTStrPutCp(pszDst, uc);
+ }
+ }
+ else
+ ucQuote = 0;
+ }
+ }
+ else
+ {
+ /*
+ * Microsoft CRT style.
+ */
+ Assert((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT);
+ bool fInQuote = false;
+ for (;;)
+ {
+ rc = RTStrGetCpEx(&pszSrc, &uc);
+ if (RT_FAILURE(rc) || !uc)
+ break;
+ if (uc == '"')
+ {
+ /* Two double quotes insides a quoted string in an escape
+ sequence and we output one double quote char.
+ See http://www.daviddeley.com/autohotkey/parameters/parameters.htm */
+ if (!fInQuote)
+ fInQuote = true;
+ else if (*pszSrc != '"')
+ fInQuote = false;
+ else
+ {
+ pszDst = RTStrPutCp(pszDst, '"');
+ pszSrc++;
+ }
+ }
+ else if (!fInQuote && rtGetOptIsCpInSet(uc, pszSeparators, cchSeparators))
+ break;
+ else if (uc != '\\')
+ pszDst = RTStrPutCp(pszDst, uc);
+ else
+ {
+ /* A backslash sequence is only relevant if followed by
+ a double quote, then it will work like an escape char. */
+ size_t cSlashes = 1;
+ while (*pszSrc == '\\')
+ {
+ cSlashes++;
+ pszSrc++;
+ }
+ if (*pszSrc != '"')
+ /* Not an escape sequence. */
+ while (cSlashes-- > 0)
+ pszDst = RTStrPutCp(pszDst, '\\');
+ else
+ {
+ /* Escape sequence. Output half of the slashes. If odd
+ number, output the escaped double quote . */
+ while (cSlashes >= 2)
+ {
+ pszDst = RTStrPutCp(pszDst, '\\');
+ cSlashes -= 2;
+ }
+ if (cSlashes)
+ {
+ pszDst = RTStrPutCp(pszDst, '"');
+ pszSrc++;
+ }
+ }
+ }
+ }
+ }
+
+ *pszDst++ = '\0';
+ if (RT_FAILURE(rc) || !uc)
+ break;
+ }
+
+ if (RT_FAILURE(rc))
+ {
+ RTMemFree(pszDup);
+ RTMemFree(papszArgs);
+ return rc;
+ }
+
+ /*
+ * Terminate the array.
+ * Check for empty string to make sure we've got an array.
+ */
+ if (iArg == 0)
+ {
+ RTMemFree(pszDup);
+ papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
+ if (!papszArgs)
+ return VERR_NO_MEMORY;
+ }
+ papszArgs[iArg] = NULL;
+
+ *pcArgs = iArg;
+ *ppapszArgv = papszArgs;
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(void) RTGetOptArgvFree(char **papszArgv)
+{
+ RTGetOptArgvFreeEx(papszArgv, 0);
+}
+
+
+RTDECL(void) RTGetOptArgvFreeEx(char **papszArgv, uint32_t fFlags)
+{
+ Assert(~(fFlags & ~RTGETOPTARGV_CNV_VALID_MASK));
+ if (papszArgv)
+ {
+ /*
+ * We've really only _two_ allocations here. Check the code in
+ * RTGetOptArgvFromString for the particulars.
+ */
+ if (!(fFlags & RTGETOPTARGV_CNV_MODIFY_INPUT))
+ RTMemFree(papszArgv[0]);
+ RTMemFree(papszArgv);
+ }
+}
+
+
+/**
+ * Checks if the argument needs quoting or not.
+ *
+ * @returns true if it needs, false if it don't.
+ * @param pszArg The argument.
+ * @param fFlags Quoting style.
+ * @param pcch Where to store the argument length when quoting
+ * is not required. (optimization)
+ */
+DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
+{
+ if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) != RTGETOPTARGV_CNV_UNQUOTED)
+ {
+ char const *psz = pszArg;
+ unsigned char ch;
+ while ((ch = (unsigned char)*psz))
+ {
+ if ( ch < 128
+ && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
+ return true;
+ psz++;
+ }
+
+ *pcch = psz - pszArg;
+ }
+ else
+ *pcch = strlen(pszArg);
+ return false;
+}
+
+
+/**
+ * Grows the command line string buffer.
+ *
+ * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
+ * @param ppszCmdLine Pointer to the command line string pointer.
+ * @param pcbCmdLineAlloc Pointer to the allocation length variable.
+ * @param cchMin The minimum size to grow with, kind of.
+ */
+static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
+{
+ size_t cb = *pcbCmdLineAlloc;
+ while (cb < cchMin)
+ cb *= 2;
+ cb *= 2;
+ *pcbCmdLineAlloc = cb;
+ return RTStrRealloc(ppszCmdLine, cb);
+}
+
+/**
+ * Checks if we have a sequence of DOS slashes followed by a double quote char.
+ *
+ * @returns true / false accordingly.
+ * @param psz The string.
+ */
+DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
+{
+ while (*psz == '\\')
+ psz++;
+ return *psz == '"' || *psz == '\0';
+}
+
+
+RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
+{
+ AssertReturn((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) <= RTGETOPTARGV_CNV_UNQUOTED, VERR_INVALID_FLAGS);
+ AssertReturn(!(fFlags & (~RTGETOPTARGV_CNV_VALID_MASK | RTGETOPTARGV_CNV_MODIFY_INPUT)), VERR_INVALID_FLAGS);
+
+#define PUT_CH(ch) \
+ if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
+ rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
+ if (RT_FAILURE(rc)) \
+ break; \
+ } \
+ pszCmdLine[off++] = (ch)
+
+#define PUT_PSZ(psz, cch) \
+ if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
+ rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
+ if (RT_FAILURE(rc)) \
+ break; \
+ } \
+ memcpy(&pszCmdLine[off], (psz), (cch)); \
+ off += (cch);
+#define PUT_SZ(sz) PUT_PSZ(sz, sizeof(sz) - 1)
+
+ /*
+ * Take the realloc approach, it requires less code and is probably more
+ * efficient than figuring out the size first.
+ */
+ int rc = VINF_SUCCESS;
+ size_t off = 0;
+ size_t cbCmdLineAlloc = 256;
+ char *pszCmdLine = RTStrAlloc(256);
+ if (!pszCmdLine)
+ return VERR_NO_STR_MEMORY;
+
+ for (size_t i = 0; papszArgv[i]; i++)
+ {
+ if (i > 0)
+ {
+ PUT_CH(' ');
+ }
+
+ /* does it need quoting? */
+ const char *pszArg = papszArgv[i];
+ size_t cchArg;
+ if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
+ {
+ /* No quoting needed, just append the argument. */
+ PUT_PSZ(pszArg, cchArg);
+ }
+ else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
+ {
+ /*
+ * Microsoft CRT quoting. Quote the whole argument in double
+ * quotes to make it easier to read and code.
+ */
+ PUT_CH('"');
+ char ch;
+ while ((ch = *pszArg++))
+ {
+ if ( ch == '\\'
+ && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
+ {
+ PUT_SZ("\\\\");
+ }
+ else if (ch == '"')
+ {
+ PUT_SZ("\\\"");
+ }
+ else
+ {
+ PUT_CH(ch);
+ }
+ }
+ PUT_CH('"');
+ }
+ else
+ {
+ /*
+ * Bourne Shell quoting. Quote the whole thing in single quotes
+ * and use double quotes for any single quote chars.
+ */
+ PUT_CH('\'');
+ char ch;
+ while ((ch = *pszArg++))
+ {
+ if (ch == '\'')
+ {
+ PUT_SZ("'\"'\"'");
+ }
+ else
+ {
+ PUT_CH(ch);
+ }
+ }
+ PUT_CH('\'');
+ }
+ }
+
+ /* Set return value / cleanup. */
+ if (RT_SUCCESS(rc))
+ {
+ pszCmdLine[off] = '\0';
+ *ppszCmdLine = pszCmdLine;
+ }
+ else
+ RTStrFree(pszCmdLine);
+#undef PUT_SZ
+#undef PUT_PSZ
+#undef PUT_CH
+ return rc;
+}
+
+
+RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
+{
+ char *pszCmdLine;
+ int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
+ RTStrFree(pszCmdLine);
+ }
+ return rc;
+}
+
+#endif /* !IPRT_REGENERATE_QUOTE_CHARS */
+