diff options
Diffstat (limited to 'src/VBox/Runtime/common/misc/uri.cpp')
-rw-r--r-- | src/VBox/Runtime/common/misc/uri.cpp | 1167 |
1 files changed, 1167 insertions, 0 deletions
diff --git a/src/VBox/Runtime/common/misc/uri.cpp b/src/VBox/Runtime/common/misc/uri.cpp new file mode 100644 index 00000000..0c045db5 --- /dev/null +++ b/src/VBox/Runtime/common/misc/uri.cpp @@ -0,0 +1,1167 @@ +/* $Id: uri.cpp $ */ +/** @file + * IPRT - Uniform Resource Identifier handling. + */ + +/* + * Copyright (C) 2011-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include <iprt/uri.h> + +#include <iprt/assert.h> +#include <iprt/ctype.h> +#include <iprt/err.h> +#include <iprt/path.h> +#include <iprt/string.h> + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Internal magic value we use to check if a RTURIPARSED structure has made it thru RTUriParse. */ +#define RTURIPARSED_MAGIC UINT32_C(0x439e0745) + + +/* General URI format: + + foo://example.com:8042/over/there?name=ferret#nose + \_/ \______________/\_________/ \_________/ \__/ + | | | | | + scheme authority path query fragment + | _____________________|__ + / \ / \ + urn:example:animal:ferret:nose +*/ + + +/** + * The following defines characters which have to be % escaped: + * control = 00-1F + * space = ' ' + * delims = '<' , '>' , '#' , '%' , '"' + * unwise = '{' , '}' , '|' , '\' , '^' , '[' , ']' , '`' + */ +#define URI_EXCLUDED(a) \ + ( ((a) >= 0x0 && (a) <= 0x20) \ + || ((a) >= 0x5B && (a) <= 0x5E) \ + || ((a) >= 0x7B && (a) <= 0x7D) \ + || (a) == '<' || (a) == '>' || (a) == '#' \ + || (a) == '%' || (a) == '"' || (a) == '`' ) + +static char *rtUriPercentEncodeN(const char *pszString, size_t cchMax) +{ + if (!pszString) + return NULL; + + int rc = VINF_SUCCESS; + + size_t cbLen = RT_MIN(strlen(pszString), cchMax); + /* The new string can be max 3 times in size of the original string. */ + char *pszNew = RTStrAlloc(cbLen * 3 + 1); + if (!pszNew) + return NULL; + + char *pszRes = NULL; + size_t iIn = 0; + size_t iOut = 0; + while (iIn < cbLen) + { + if (URI_EXCLUDED(pszString[iIn])) + { + char szNum[3] = { 0, 0, 0 }; + RTStrFormatU8(&szNum[0], 3, pszString[iIn++], 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD); + pszNew[iOut++] = '%'; + pszNew[iOut++] = szNum[0]; + pszNew[iOut++] = szNum[1]; + } + else + pszNew[iOut++] = pszString[iIn++]; + } + if (RT_SUCCESS(rc)) + { + pszNew[iOut] = '\0'; + if (iOut != iIn) + { + /* If the source and target strings have different size, recreate + * the target string with the correct size. */ + pszRes = RTStrDupN(pszNew, iOut); + RTStrFree(pszNew); + } + else + pszRes = pszNew; + } + else + RTStrFree(pszNew); + + return pszRes; +} + + +/** + * Calculates the encoded string length. + * + * @returns Number of chars (excluding the terminator). + * @param pszString The string to encode. + * @param cchMax The maximum string length (e.g. RTSTR_MAX). + * @param fEncodeDosSlash Whether to encode DOS slashes or not. + */ +static size_t rtUriCalcEncodedLength(const char *pszString, size_t cchMax, bool fEncodeDosSlash) +{ + size_t cchEncoded = 0; + if (pszString) + { + size_t cchSrcLeft = RTStrNLen(pszString, cchMax); + while (cchSrcLeft-- > 0) + { + char const ch = *pszString++; + if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash)) + cchEncoded += 1; + else + cchEncoded += 3; + } + } + return cchEncoded; +} + + +/** + * Encodes an URI into a caller allocated buffer. + * + * @returns IPRT status code. + * @param pszString The string to encode. + * @param cchMax The maximum string length (e.g. RTSTR_MAX). + * @param fEncodeDosSlash Whether to encode DOS slashes or not. + * @param pszDst The destination buffer. + * @param cbDst The size of the destination buffer. + */ +static int rtUriEncodeIntoBuffer(const char *pszString, size_t cchMax, bool fEncodeDosSlash, char *pszDst, size_t cbDst) +{ + AssertReturn(pszString, VERR_INVALID_POINTER); + AssertPtrReturn(pszDst, VERR_INVALID_POINTER); + + /* + * We do buffer size checking up front and every time we encode a special + * character. That's faster than checking for each char. + */ + size_t cchSrcLeft = RTStrNLen(pszString, cchMax); + AssertMsgReturn(cbDst > cchSrcLeft, ("cbDst=%zu cchSrcLeft=%zu\n", cbDst, cchSrcLeft), VERR_BUFFER_OVERFLOW); + cbDst -= cchSrcLeft; + + while (cchSrcLeft-- > 0) + { + char const ch = *pszString++; + if (!URI_EXCLUDED(ch) || (ch == '\\' && !fEncodeDosSlash)) + *pszDst++ = ch; + else + { + AssertReturn(cbDst >= 3, VERR_BUFFER_OVERFLOW); /* 2 extra bytes + zero terminator. */ + cbDst -= 2; + + *pszDst++ = '%'; + ssize_t cchTmp = RTStrFormatU8(pszDst, 3, (unsigned char)ch, 16, 2, 2, RTSTR_F_CAPITAL | RTSTR_F_ZEROPAD); + Assert(cchTmp == 2); NOREF(cchTmp); + pszDst += 2; + } + } + + *pszDst = '\0'; + return VINF_SUCCESS; +} + + +static char *rtUriPercentDecodeN(const char *pszString, size_t cchString) +{ + AssertPtrReturn(pszString, NULL); + AssertReturn(memchr(pszString, '\0', cchString) == NULL, NULL); + + /* + * The new string can only get smaller, so use the input length as a + * staring buffer size. + */ + char *pszDecoded = RTStrAlloc(cchString + 1); + if (pszDecoded) + { + /* + * Knowing that the pszString itself is valid UTF-8, we only have to + * validate the escape sequences. + */ + size_t cchLeft = cchString; + char const *pchSrc = pszString; + char *pchDst = pszDecoded; + while (cchLeft > 0) + { + const char *pchPct = (const char *)memchr(pchSrc, '%', cchLeft); + if (pchPct) + { + size_t cchBefore = pchPct - pchSrc; + if (cchBefore) + { + memcpy(pchDst, pchSrc, cchBefore); + pchDst += cchBefore; + pchSrc += cchBefore; + cchLeft -= cchBefore; + } + + char chHigh, chLow; + if ( cchLeft >= 3 + && RT_C_IS_XDIGIT(chHigh = pchSrc[1]) + && RT_C_IS_XDIGIT(chLow = pchSrc[2])) + { + uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10; + b <<= 4; + b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10; + *pchDst++ = (char)b; + pchSrc += 3; + cchLeft -= 3; + } + else + { + AssertFailed(); + *pchDst++ = *pchSrc++; + cchLeft--; + } + } + else + { + memcpy(pchDst, pchSrc, cchLeft); + pchDst += cchLeft; + pchSrc += cchLeft; + cchLeft = 0; + break; + } + } + + *pchDst = '\0'; + + /* + * If we've got lof space room in the result string, reallocate it. + */ + size_t cchDecoded = pchDst - pszDecoded; + Assert(cchDecoded <= cchString); + if (cchString - cchDecoded > 64) + RTStrRealloc(&pszDecoded, cchDecoded + 1); + } + return pszDecoded; +} + + +/** + * Calculates the decoded string length. + * + * @returns Number of chars (excluding the terminator). + * @param pszString The string to decode. + * @param cchMax The maximum string length (e.g. RTSTR_MAX). + */ +static size_t rtUriCalcDecodedLength(const char *pszString, size_t cchMax) +{ + size_t cchDecoded; + if (pszString) + { + size_t cchSrcLeft = cchDecoded = RTStrNLen(pszString, cchMax); + while (cchSrcLeft-- > 0) + { + char const ch = *pszString++; + if (ch != '%') + { /* typical */} + else if ( cchSrcLeft >= 2 + && RT_C_IS_XDIGIT(pszString[0]) + && RT_C_IS_XDIGIT(pszString[1])) + { + cchDecoded -= 2; + pszString += 2; + cchSrcLeft -= 2; + } + } + } + else + cchDecoded = 0; + return cchDecoded; +} + + +/** + * Decodes a string into a buffer. + * + * @returns IPRT status code. + * @param pchSrc The source string. + * @param cchSrc The max number of bytes to decode in the source string. + * @param pszDst The destination buffer. + * @param cbDst The size of the buffer (including terminator). + */ +static int rtUriDecodeIntoBuffer(const char *pchSrc, size_t cchSrc, char *pszDst, size_t cbDst) +{ + AssertPtrReturn(pchSrc, VERR_INVALID_POINTER); + AssertPtrReturn(pszDst, VERR_INVALID_POINTER); + + /* + * Knowing that the pszString itself is valid UTF-8, we only have to + * validate the escape sequences. + */ + cchSrc = RTStrNLen(pchSrc, cchSrc); + while (cchSrc > 0) + { + const char *pchPct = (const char *)memchr(pchSrc, '%', cchSrc); + if (pchPct) + { + size_t cchBefore = pchPct - pchSrc; + AssertReturn(cchBefore + 1 < cbDst, VERR_BUFFER_OVERFLOW); + if (cchBefore) + { + memcpy(pszDst, pchSrc, cchBefore); + pszDst += cchBefore; + cbDst -= cchBefore; + pchSrc += cchBefore; + cchSrc -= cchBefore; + } + + char chHigh, chLow; + if ( cchSrc >= 3 + && RT_C_IS_XDIGIT(chHigh = pchSrc[1]) + && RT_C_IS_XDIGIT(chLow = pchSrc[2])) + { + uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10; + b <<= 4; + b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10; + *pszDst++ = (char)b; + pchSrc += 3; + cchSrc -= 3; + } + else + { + AssertFailed(); + *pszDst++ = *pchSrc++; + cchSrc--; + } + cbDst -= 1; + } + else + { + AssertReturn(cchSrc < cbDst, VERR_BUFFER_OVERFLOW); + memcpy(pszDst, pchSrc, cchSrc); + pszDst += cchSrc; + cbDst -= cchSrc; + pchSrc += cchSrc; + cchSrc = 0; + break; + } + } + + AssertReturn(cbDst > 0, VERR_BUFFER_OVERFLOW); + *pszDst = '\0'; + return VINF_SUCCESS; +} + + + +static int rtUriParse(const char *pszUri, PRTURIPARSED pParsed) +{ + /* + * Validate the input and clear the output. + */ + AssertPtrReturn(pParsed, VERR_INVALID_POINTER); + RT_ZERO(*pParsed); + pParsed->uAuthorityPort = UINT32_MAX; + + AssertPtrReturn(pszUri, VERR_INVALID_POINTER); + + size_t const cchUri = strlen(pszUri); + if (RT_LIKELY(cchUri >= 3)) { /* likely */ } + else return cchUri ? VERR_URI_TOO_SHORT : VERR_URI_EMPTY; + + /* + * Validating escaped text sequences is much simpler if we know that + * that the base URI string is valid. Also, we don't necessarily trust + * the developer calling us to remember to do this. + */ + int rc = RTStrValidateEncoding(pszUri); + AssertRCReturn(rc, rc); + + /* + * RFC-3986, section 3.1: + * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * The scheme ends with a ':', which we also skip here. + */ + size_t off = 0; + char ch = pszUri[off++]; + if (RT_LIKELY(RT_C_IS_ALPHA(ch))) { /* likely */ } + else return VERR_URI_INVALID_SCHEME; + for (;;) + { + ch = pszUri[off]; + if (ch == ':') + break; + if (RT_LIKELY(RT_C_IS_ALNUM(ch) || ch == '.' || ch == '-' || ch == '+')) { /* likely */ } + else return VERR_URI_INVALID_SCHEME; + off++; + } + pParsed->cchScheme = off; + + /* Require the scheme length to be at least two chars so we won't confuse + it with a path starting with a DOS drive letter specification. */ + if (RT_LIKELY(off >= 2)) { /* likely */ } + else return VERR_URI_INVALID_SCHEME; + + off++; /* (skip colon) */ + + /* + * Find the end of the path, we'll need this several times. + * Also, while we're potentially scanning the whole thing, check for '%'. + */ + size_t const offHash = RTStrOffCharOrTerm(&pszUri[off], '#') + off; + size_t const offQuestionMark = RTStrOffCharOrTerm(&pszUri[off], '?') + off; + + if (memchr(pszUri, '%', cchUri) != NULL) + pParsed->fFlags |= RTURIPARSED_F_CONTAINS_ESCAPED_CHARS; + + /* + * RFC-3986, section 3.2: + * The authority component is preceeded by a double slash ("//")... + */ + if ( pszUri[off] == '/' + && pszUri[off + 1] == '/') + { + off += 2; + pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off; + pParsed->fFlags |= RTURIPARSED_F_HAS_AUTHORITY; + + /* + * RFC-3986, section 3.2: + * ...and is terminated by the next slash ("/"), question mark ("?"), + * or number sign ("#") character, or by the end of the URI. + */ + const char *pszAuthority = &pszUri[off]; + size_t cchAuthority = RTStrOffCharOrTerm(pszAuthority, '/'); + cchAuthority = RT_MIN(cchAuthority, offHash - off); + cchAuthority = RT_MIN(cchAuthority, offQuestionMark - off); + pParsed->cchAuthority = cchAuthority; + + /* The Authority can be empty, like for: file:///usr/bin/grep */ + if (cchAuthority > 0) + { + pParsed->cchAuthorityHost = cchAuthority; + + /* + * If there is a userinfo part, it is ended by a '@'. + */ + const char *pszAt = (const char *)memchr(pszAuthority, '@', cchAuthority); + if (pszAt) + { + size_t cchTmp = pszAt - pszAuthority; + pParsed->offAuthorityHost += cchTmp + 1; + pParsed->cchAuthorityHost -= cchTmp + 1; + + /* If there is a password part, it's separated from the username with a colon. */ + const char *pszColon = (const char *)memchr(pszAuthority, ':', cchTmp); + if (pszColon) + { + pParsed->cchAuthorityUsername = pszColon - pszAuthority; + pParsed->offAuthorityPassword = &pszColon[1] - pszUri; + pParsed->cchAuthorityPassword = pszAt - &pszColon[1]; + } + else + { + pParsed->cchAuthorityUsername = cchTmp; + pParsed->offAuthorityPassword = off + cchTmp; + } + } + + /* + * If there is a port part, its after the last colon in the host part. + */ + const char *pszColon = (const char *)memrchr(&pszUri[pParsed->offAuthorityHost], ':', pParsed->cchAuthorityHost); + if (pszColon) + { + size_t cchTmp = &pszUri[pParsed->offAuthorityHost + pParsed->cchAuthorityHost] - &pszColon[1]; + pParsed->cchAuthorityHost -= cchTmp + 1; + pParsed->fFlags |= RTURIPARSED_F_HAS_PORT; + if (cchTmp > 0) + { + pParsed->uAuthorityPort = 0; + while (cchTmp-- > 0) + { + ch = *++pszColon; + if ( RT_C_IS_DIGIT(ch) + && pParsed->uAuthorityPort < UINT32_MAX / UINT32_C(10)) + { + pParsed->uAuthorityPort *= 10; + pParsed->uAuthorityPort += ch - '0'; + } + else + return VERR_URI_INVALID_PORT_NUMBER; + } + } + } + } + + /* Skip past the authority. */ + off += cchAuthority; + } + else + pParsed->offAuthority = pParsed->offAuthorityUsername = pParsed->offAuthorityPassword = pParsed->offAuthorityHost = off; + + /* + * RFC-3986, section 3.3: Path + * The path is terminated by the first question mark ("?") + * or number sign ("#") character, or by the end of the URI. + */ + pParsed->offPath = off; + pParsed->cchPath = RT_MIN(offHash, offQuestionMark) - off; + off += pParsed->cchPath; + + /* + * RFC-3986, section 3.4: Query + * The query component is indicated by the first question mark ("?") + * character and terminated by a number sign ("#") character or by the + * end of the URI. + */ + if ( off == offQuestionMark + && off < cchUri) + { + Assert(pszUri[offQuestionMark] == '?'); + pParsed->offQuery = ++off; + pParsed->cchQuery = offHash - off; + off = offHash; + } + else + { + Assert(!pszUri[offQuestionMark]); + pParsed->offQuery = off; + } + + /* + * RFC-3986, section 3.5: Fragment + * A fragment identifier component is indicated by the presence of a + * number sign ("#") character and terminated by the end of the URI. + */ + if ( off == offHash + && off < cchUri) + { + pParsed->offFragment = ++off; + pParsed->cchFragment = cchUri - off; + } + else + { + Assert(!pszUri[offHash]); + pParsed->offFragment = off; + } + + /* + * If there are any escape sequences, validate them. + * + * This is reasonably simple as we already know that the string is valid UTF-8 + * before they get decoded. Thus we only have to validate the escaped sequences. + */ + if (pParsed->fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS) + { + const char *pchSrc = (const char *)memchr(pszUri, '%', cchUri); + AssertReturn(pchSrc, VERR_INTERNAL_ERROR); + do + { + char szUtf8Seq[8]; + unsigned cchUtf8Seq = 0; + unsigned cchNeeded = 0; + size_t cchLeft = &pszUri[cchUri] - pchSrc; + do + { + if (cchLeft >= 3) + { + char chHigh = pchSrc[1]; + char chLow = pchSrc[2]; + if ( RT_C_IS_XDIGIT(chHigh) + && RT_C_IS_XDIGIT(chLow)) + { + uint8_t b = RT_C_IS_DIGIT(chHigh) ? chHigh - '0' : (chHigh & ~0x20) - 'A' + 10; + b <<= 4; + b |= RT_C_IS_DIGIT(chLow) ? chLow - '0' : (chLow & ~0x20) - 'A' + 10; + + if (!(b & 0x80)) + { + /* We don't want the string to be terminated prematurely. */ + if (RT_LIKELY(b != 0)) { /* likely */ } + else return VERR_URI_ESCAPED_ZERO; + + /* Check that we're not expecting more UTF-8 bytes. */ + if (RT_LIKELY(cchNeeded == 0)) { /* likely */ } + else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE; + } + /* Are we waiting UTF-8 bytes? */ + else if (cchNeeded > 0) + { + if (RT_LIKELY(!(b & 0x40))) { /* likely */ } + else return VERR_URI_INVALID_ESCAPED_UTF8_CONTINUATION_BYTE; + + szUtf8Seq[cchUtf8Seq++] = (char)b; + if (--cchNeeded == 0) + { + szUtf8Seq[cchUtf8Seq] = '\0'; + rc = RTStrValidateEncoding(szUtf8Seq); + if (RT_FAILURE(rc)) + return VERR_URI_ESCAPED_CHARS_NOT_VALID_UTF8; + cchUtf8Seq = 0; + } + } + /* Start a new UTF-8 sequence. */ + else + { + if ((b & 0xf8) == 0xf0) + cchNeeded = 3; + else if ((b & 0xf0) == 0xe0) + cchNeeded = 2; + else if ((b & 0xe0) == 0xc0) + cchNeeded = 1; + else + return VERR_URI_INVALID_ESCAPED_UTF8_LEAD_BYTE; + szUtf8Seq[0] = (char)b; + cchUtf8Seq = 1; + } + pchSrc += 3; + cchLeft -= 3; + } + else + return VERR_URI_INVALID_ESCAPE_SEQ; + } + else + return VERR_URI_INVALID_ESCAPE_SEQ; + } while (cchLeft > 0 && pchSrc[0] == '%'); + + /* Check that we're not expecting more UTF-8 bytes. */ + if (RT_LIKELY(cchNeeded == 0)) { /* likely */ } + else return VERR_URI_MISSING_UTF8_CONTINUATION_BYTE; + + /* next */ + pchSrc = (const char *)memchr(pchSrc, '%', cchLeft); + } while (pchSrc); + } + + pParsed->u32Magic = RTURIPARSED_MAGIC; + return VINF_SUCCESS; +} + + +RTDECL(int) RTUriParse(const char *pszUri, PRTURIPARSED pParsed) +{ + return rtUriParse(pszUri, pParsed); +} + + +RTDECL(char *) RTUriParsedScheme(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + return RTStrDupN(pszUri, pParsed->cchScheme); +} + + +RTDECL(char *) RTUriParsedAuthority(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchAuthority || (pParsed->fFlags & RTURIPARSED_F_HAS_AUTHORITY)) + return rtUriPercentDecodeN(&pszUri[pParsed->offAuthority], pParsed->cchAuthority); + return NULL; +} + + +RTDECL(char *) RTUriParsedAuthorityUsername(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchAuthorityUsername) + return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityUsername], pParsed->cchAuthorityUsername); + return NULL; +} + + +RTDECL(char *) RTUriParsedAuthorityPassword(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchAuthorityPassword) + return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityPassword], pParsed->cchAuthorityPassword); + return NULL; +} + + +RTDECL(char *) RTUriParsedAuthorityHost(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchAuthorityHost) + return rtUriPercentDecodeN(&pszUri[pParsed->offAuthorityHost], pParsed->cchAuthorityHost); + return NULL; +} + + +RTDECL(uint32_t) RTUriParsedAuthorityPort(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, UINT32_MAX); + AssertPtrReturn(pParsed, UINT32_MAX); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, UINT32_MAX); + return pParsed->uAuthorityPort; +} + + +RTDECL(char *) RTUriParsedPath(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchPath) + return rtUriPercentDecodeN(&pszUri[pParsed->offPath], pParsed->cchPath); + return NULL; +} + + +RTDECL(char *) RTUriParsedQuery(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchQuery) + return rtUriPercentDecodeN(&pszUri[pParsed->offQuery], pParsed->cchQuery); + return NULL; +} + + +RTDECL(char *) RTUriParsedFragment(const char *pszUri, PCRTURIPARSED pParsed) +{ + AssertPtrReturn(pszUri, NULL); + AssertPtrReturn(pParsed, NULL); + AssertReturn(pParsed->u32Magic == RTURIPARSED_MAGIC, NULL); + if (pParsed->cchFragment) + return rtUriPercentDecodeN(&pszUri[pParsed->offFragment], pParsed->cchFragment); + return NULL; +} + + +RTDECL(char *) RTUriCreate(const char *pszScheme, const char *pszAuthority, const char *pszPath, const char *pszQuery, + const char *pszFragment) +{ + if (!pszScheme) /* Scheme is minimum requirement */ + return NULL; + + char *pszResult = 0; + char *pszAuthority1 = 0; + char *pszPath1 = 0; + char *pszQuery1 = 0; + char *pszFragment1 = 0; + + do + { + /* Create the percent encoded strings and calculate the necessary uri + * length. */ + size_t cbSize = strlen(pszScheme) + 1 + 1; /* plus zero byte */ + if (pszAuthority) + { + pszAuthority1 = rtUriPercentEncodeN(pszAuthority, RTSTR_MAX); + if (!pszAuthority1) + break; + cbSize += strlen(pszAuthority1) + 2; + } + if (pszPath) + { + pszPath1 = rtUriPercentEncodeN(pszPath, RTSTR_MAX); + if (!pszPath1) + break; + cbSize += strlen(pszPath1); + } + if (pszQuery) + { + pszQuery1 = rtUriPercentEncodeN(pszQuery, RTSTR_MAX); + if (!pszQuery1) + break; + cbSize += strlen(pszQuery1) + 1; + } + if (pszFragment) + { + pszFragment1 = rtUriPercentEncodeN(pszFragment, RTSTR_MAX); + if (!pszFragment1) + break; + cbSize += strlen(pszFragment1) + 1; + } + + char *pszTmp = pszResult = (char *)RTStrAlloc(cbSize); + if (!pszResult) + break; + RT_BZERO(pszTmp, cbSize); + + /* Compose the target uri string. */ + RTStrCatP(&pszTmp, &cbSize, pszScheme); + RTStrCatP(&pszTmp, &cbSize, ":"); + if (pszAuthority1) + { + RTStrCatP(&pszTmp, &cbSize, "//"); + RTStrCatP(&pszTmp, &cbSize, pszAuthority1); + } + if (pszPath1) + { + RTStrCatP(&pszTmp, &cbSize, pszPath1); + } + if (pszQuery1) + { + RTStrCatP(&pszTmp, &cbSize, "?"); + RTStrCatP(&pszTmp, &cbSize, pszQuery1); + } + if (pszFragment1) + { + RTStrCatP(&pszTmp, &cbSize, "#"); + RTStrCatP(&pszTmp, &cbSize, pszFragment1); + } + } while (0); + + /* Cleanup */ + if (pszAuthority1) + RTStrFree(pszAuthority1); + if (pszPath1) + RTStrFree(pszPath1); + if (pszQuery1) + RTStrFree(pszQuery1); + if (pszFragment1) + RTStrFree(pszFragment1); + + return pszResult; +} + + +RTDECL(bool) RTUriIsSchemeMatch(const char *pszUri, const char *pszScheme) +{ + AssertPtrReturn(pszUri, false); + size_t const cchScheme = strlen(pszScheme); + return RTStrNICmp(pszUri, pszScheme, cchScheme) == 0 + && pszUri[cchScheme] == ':'; +} + + +RTDECL(int) RTUriFileCreateEx(const char *pszPath, uint32_t fPathStyle, char **ppszUri, size_t cbUri, size_t *pcchUri) +{ + /* + * Validate and adjust input. (RTPathParse check pszPath out for us) + */ + if (pcchUri) + { + AssertPtrReturn(pcchUri, VERR_INVALID_POINTER); + *pcchUri = ~(size_t)0; + } + AssertPtrReturn(ppszUri, VERR_INVALID_POINTER); + AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS); + if (fPathStyle == RTPATH_STR_F_STYLE_HOST) + fPathStyle = RTPATH_STYLE; + + /* + * Let the RTPath code parse the stuff (no reason to duplicate path parsing + * and get it slightly wrong here). + */ + RTPATHPARSED ParsedPath; + int rc = RTPathParse(pszPath, &ParsedPath, sizeof(ParsedPath), fPathStyle); + if (RT_SUCCESS(rc) || rc == VERR_BUFFER_OVERFLOW) + { + /* Skip leading slashes. */ + if (ParsedPath.fProps & RTPATH_PROP_ROOT_SLASH) + { + if (fPathStyle == RTPATH_STR_F_STYLE_DOS) + while (pszPath[0] == '/' || pszPath[0] == '\\') + pszPath++; + else + while (pszPath[0] == '/') + pszPath++; + } + const size_t cchPath = strlen(pszPath); + + /* + * Calculate the encoded length and figure destination buffering. + */ + static const char s_szPrefix[] = "file:///"; + size_t const cchPrefix = sizeof(s_szPrefix) - (ParsedPath.fProps & RTPATH_PROP_UNC ? 2 : 1); + size_t cchEncoded = rtUriCalcEncodedLength(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS); + + if (pcchUri) + *pcchUri = cchEncoded; + + char *pszDst; + char *pszFreeMe = NULL; + if (!cbUri || *ppszUri == NULL) + { + cbUri = RT_MAX(cbUri, cchPrefix + cchEncoded + 1); + *ppszUri = pszFreeMe = pszDst = RTStrAlloc(cbUri); + AssertReturn(pszDst, VERR_NO_STR_MEMORY); + } + else if (cchEncoded < cbUri) + pszDst = *ppszUri; + else + return VERR_BUFFER_OVERFLOW; + + /* + * Construct the URI. + */ + memcpy(pszDst, s_szPrefix, cchPrefix); + pszDst[cchPrefix] = '\0'; + rc = rtUriEncodeIntoBuffer(pszPath, cchPath, fPathStyle != RTPATH_STR_F_STYLE_DOS, &pszDst[cchPrefix], cbUri - cchPrefix); + if (RT_SUCCESS(rc)) + { + Assert(strlen(pszDst) == cbUri - 1); + if (fPathStyle == RTPATH_STR_F_STYLE_DOS) + RTPathChangeToUnixSlashes(pszDst, true /*fForce*/); + return VINF_SUCCESS; + } + + AssertRC(rc); /* Impossible! rtUriCalcEncodedLength or something above is busted! */ + if (pszFreeMe) + RTStrFree(pszFreeMe); + } + return rc; +} + + +RTDECL(char *) RTUriFileCreate(const char *pszPath) +{ + char *pszUri = NULL; + int rc = RTUriFileCreateEx(pszPath, RTPATH_STR_F_STYLE_HOST, &pszUri, 0 /*cbUri*/, NULL /*pcchUri*/); + if (RT_SUCCESS(rc)) + return pszUri; + return NULL; +} + + +RTDECL(int) RTUriFilePathEx(const char *pszUri, uint32_t fPathStyle, char **ppszPath, size_t cbPath, size_t *pcchPath) +{ + /* + * Validate and adjust input. + */ + if (pcchPath) + { + AssertPtrReturn(pcchPath, VERR_INVALID_POINTER); + *pcchPath = ~(size_t)0; + } + AssertPtrReturn(ppszPath, VERR_INVALID_POINTER); + AssertReturn(!(fPathStyle & ~RTPATH_STR_F_STYLE_MASK) && fPathStyle != RTPATH_STR_F_STYLE_RESERVED, VERR_INVALID_FLAGS); + if (fPathStyle == RTPATH_STR_F_STYLE_HOST) + fPathStyle = RTPATH_STYLE; + AssertPtrReturn(pszUri, VERR_INVALID_POINTER); + + /* + * Check that this is a file URI. + */ + if (RTStrNICmp(pszUri, RT_STR_TUPLE("file:")) == 0) + { /* likely */ } + else + return VERR_URI_NOT_FILE_SCHEME; + + /* + * We may have a number of variations here, mostly thanks to + * various windows software. First the canonical variations: + * - file:///C:/Windows/System32/kernel32.dll + * - file:///C|/Windows/System32/kernel32.dll + * - file:///C:%5CWindows%5CSystem32%5Ckernel32.dll + * - file://localhost/C:%5CWindows%5CSystem32%5Ckernel32.dll + * - file://cifsserver.dev/systemshare%5CWindows%5CSystem32%5Ckernel32.dll + * - file://cifsserver.dev:139/systemshare%5CWindows%5CSystem32%5Ckernel32.dll (not quite sure here, but whatever) + * + * Legacy variant without any slashes after the schema: + * - file:C:/Windows/System32/kernel32.dll + * - file:C|/Windows/System32%5Ckernel32.dll + * - file:~/.bashrc + * \--path-/ + * + * Legacy variant with exactly one slashes after the schema: + * - file:/C:/Windows/System32%5Ckernel32.dll + * - file:/C|/Windows/System32/kernel32.dll + * - file:/usr/bin/env + * \---path---/ + * + * Legacy variant with two slashes after the schema and an unescaped DOS path: + * - file://C:/Windows/System32\kernel32.dll (**) + * - file://C|/Windows/System32\kernel32.dll + * \---path---------------------/ + * -- authority, with ':' as non-working port separator + * + * Legacy variant with exactly four slashes after the schema and an unescaped DOS path. + * - file:////C:/Windows\System32\user32.dll + * + * Legacy variant with four or more slashes after the schema and an unescaped UNC path: + * - file:////cifsserver.dev/systemshare/System32%\kernel32.dll + * - file://///cifsserver.dev/systemshare/System32\kernel32.dll + * \---path--------------------------------------------/ + * + * The two unescaped variants shouldn't be handed to rtUriParse, which + * is good as we cannot actually handle the one marked by (**). So, handle + * those two special when parsing. + */ + RTURIPARSED Parsed; + int rc; + size_t cSlashes = 0; + while (pszUri[5 + cSlashes] == '/') + cSlashes++; + if ( (cSlashes == 2 || cSlashes == 4) + && RT_C_IS_ALPHA(pszUri[5 + cSlashes]) + && (pszUri[5 + cSlashes + 1] == ':' || pszUri[5 + cSlashes + 1] == '|')) + { + RT_ZERO(Parsed); /* RTURIPARSED_F_CONTAINS_ESCAPED_CHARS is now clear. */ + Parsed.offPath = 5 + cSlashes; + Parsed.cchPath = strlen(&pszUri[Parsed.offPath]); + rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]); + } + else if (cSlashes >= 4) + { + RT_ZERO(Parsed); + Parsed.fFlags = cSlashes > 4 ? RTURIPARSED_F_CONTAINS_ESCAPED_CHARS : 0; + Parsed.offPath = 5 + cSlashes - 2; + Parsed.cchPath = strlen(&pszUri[Parsed.offPath]); + rc = RTStrValidateEncoding(&pszUri[Parsed.offPath]); + } + else + rc = rtUriParse(pszUri, &Parsed); + if (RT_SUCCESS(rc)) + { + /* + * Ignore localhost as hostname (it's implicit). + */ + static char const s_szLocalhost[] = "localhost"; + if ( Parsed.cchAuthorityHost == sizeof(s_szLocalhost) - 1U + && RTStrNICmp(&pszUri[Parsed.offAuthorityHost], RT_STR_TUPLE(s_szLocalhost)) == 0) + { + Parsed.cchAuthorityHost = 0; + Parsed.cchAuthority = 0; + } + + /* + * Ignore leading path slash/separator if we detect a DOS drive letter + * and we don't have a host name. + */ + if ( Parsed.cchPath >= 3 + && Parsed.cchAuthorityHost == 0 + && pszUri[Parsed.offPath] == '/' /* Leading path slash/separator. */ + && ( pszUri[Parsed.offPath + 2] == ':' /* Colon after drive letter. */ + || pszUri[Parsed.offPath + 2] == '|') /* Colon alternative. */ + && RT_C_IS_ALPHA(pszUri[Parsed.offPath + 1]) ) /* Drive letter. */ + { + Parsed.offPath++; + Parsed.cchPath--; + } + + /* + * Calculate the size of the encoded result. + * + * Since we're happily returning "C:/Windows/System32/kernel.dll" + * style paths when the caller requested UNIX style paths, we will + * return straight UNC paths too ("//cifsserver/share/dir/file"). + */ + size_t cchDecodedHost = 0; + size_t cbResult; + if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS) + { + cchDecodedHost = rtUriCalcDecodedLength(&pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost); + cbResult = cchDecodedHost + rtUriCalcDecodedLength(&pszUri[Parsed.offPath], Parsed.cchPath) + 1; + } + else + { + cchDecodedHost = 0; + cbResult = Parsed.cchAuthorityHost + Parsed.cchPath + 1; + } + if (pcchPath) + *pcchPath = cbResult - 1; + if (cbResult > 1) + { + /* + * Prepare the necessary buffer space for the result. + */ + char *pszDst; + char *pszFreeMe = NULL; + if (!cbPath || *ppszPath == NULL) + { + cbPath = RT_MAX(cbPath, cbResult); + *ppszPath = pszFreeMe = pszDst = RTStrAlloc(cbPath); + AssertReturn(pszDst, VERR_NO_STR_MEMORY); + } + else if (cbResult <= cbPath) + pszDst = *ppszPath; + else + return VERR_BUFFER_OVERFLOW; + + /* + * Compose the result. + */ + if (Parsed.fFlags & RTURIPARSED_F_CONTAINS_ESCAPED_CHARS) + { + rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offAuthorityHost],Parsed.cchAuthorityHost, + pszDst, cchDecodedHost + 1); + Assert(RT_SUCCESS(rc) && strlen(pszDst) == cchDecodedHost); + if (RT_SUCCESS(rc)) + rc = rtUriDecodeIntoBuffer(&pszUri[Parsed.offPath], Parsed.cchPath, + &pszDst[cchDecodedHost], cbResult - cchDecodedHost); + Assert(RT_SUCCESS(rc) && strlen(pszDst) == cbResult - 1); + } + else + { + memcpy(pszDst, &pszUri[Parsed.offAuthorityHost], Parsed.cchAuthorityHost); + memcpy(&pszDst[Parsed.cchAuthorityHost], &pszUri[Parsed.offPath], Parsed.cchPath); + pszDst[cbResult - 1] = '\0'; + } + if (RT_SUCCESS(rc)) + { + /* + * Convert colon DOS driver letter colon alternative. + * We do this regardless of the desired path style. + */ + if ( RT_C_IS_ALPHA(pszDst[0]) + && pszDst[1] == '|') + pszDst[1] = ':'; + + /* + * Fix slashes. + */ + if (fPathStyle == RTPATH_STR_F_STYLE_DOS) + RTPathChangeToDosSlashes(pszDst, true); + else if (fPathStyle == RTPATH_STR_F_STYLE_UNIX) + RTPathChangeToUnixSlashes(pszDst, true); /** @todo not quite sure how this actually makes sense... */ + else + AssertFailed(); + return rc; + } + + /* bail out */ + RTStrFree(pszFreeMe); + } + else + rc = VERR_PATH_ZERO_LENGTH; + } + return rc; +} + + +RTDECL(char *) RTUriFilePath(const char *pszUri) +{ + char *pszPath = NULL; + int rc = RTUriFilePathEx(pszUri, RTPATH_STR_F_STYLE_HOST, &pszPath, 0 /*cbPath*/, NULL /*pcchPath*/); + if (RT_SUCCESS(rc)) + return pszPath; + return NULL; +} + |