diff options
Diffstat (limited to 'netwerk/base/nsURLParsers.cpp')
-rw-r--r-- | netwerk/base/nsURLParsers.cpp | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/netwerk/base/nsURLParsers.cpp b/netwerk/base/nsURLParsers.cpp new file mode 100644 index 0000000000..7dc6567d84 --- /dev/null +++ b/netwerk/base/nsURLParsers.cpp @@ -0,0 +1,644 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <string.h> + +#include "mozilla/RangedPtr.h" +#include "mozilla/TextUtils.h" + +#include "nsCRTGlue.h" +#include "nsURLParsers.h" +#include "nsURLHelper.h" +#include "nsString.h" + +using namespace mozilla; + +//---------------------------------------------------------------------------- + +static uint32_t CountConsecutiveSlashes(const char* str, int32_t len) { + RangedPtr<const char> p(str, len); + uint32_t count = 0; + while (len-- && *p++ == '/') ++count; + return count; +} + +//---------------------------------------------------------------------------- +// nsBaseURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser) +NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser) + +#define SET_RESULT(component, pos, len) \ + PR_BEGIN_MACRO \ + if (component##Pos) *component##Pos = uint32_t(pos); \ + if (component##Len) *component##Len = int32_t(len); \ + PR_END_MACRO + +#define OFFSET_RESULT(component, offset) \ + PR_BEGIN_MACRO \ + if (component##Pos) *component##Pos += (offset); \ + PR_END_MACRO + +NS_IMETHODIMP +nsBaseURLParser::ParseURL(const char* spec, int32_t specLen, + uint32_t* schemePos, int32_t* schemeLen, + uint32_t* authorityPos, int32_t* authorityLen, + uint32_t* pathPos, int32_t* pathLen) { + if (NS_WARN_IF(!spec)) { + return NS_ERROR_INVALID_POINTER; + } + + if (specLen < 0) specLen = strlen(spec); + + const char* stop = nullptr; + const char* colon = nullptr; + const char* slash = nullptr; + const char* p = spec; + uint32_t offset = 0; + int32_t len = specLen; + + // skip leading whitespace + while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') { + spec++; + specLen--; + offset++; + + p++; + len--; + } + + for (; len && *p && !colon && !slash; ++p, --len) { + switch (*p) { + case ':': + if (!colon) colon = p; + break; + case '/': // start of filepath + case '?': // start of query + case '#': // start of ref + if (!slash) slash = p; + break; + case '@': // username@hostname + case '[': // start of IPv6 address literal + if (!stop) stop = p; + break; + } + } + // disregard the first colon if it follows an '@' or a '[' + if (colon && stop && colon > stop) colon = nullptr; + + // if the spec only contained whitespace ... + if (specLen == 0) { + SET_RESULT(scheme, 0, -1); + SET_RESULT(authority, 0, 0); + SET_RESULT(path, 0, 0); + return NS_OK; + } + + // ignore trailing whitespace and control characters + for (p = spec + specLen - 1; ((unsigned char)*p <= ' ') && (p != spec); --p) { + ; + } + + specLen = p - spec + 1; + + if (colon && (colon < slash || !slash)) { + // + // spec = <scheme>:/<the-rest> + // + // or + // + // spec = <scheme>:<authority> + // spec = <scheme>:<path-no-slashes> + // + if (!net_IsValidScheme(nsDependentCSubstring(spec, colon - spec))) { + return NS_ERROR_MALFORMED_URI; + } + SET_RESULT(scheme, offset, colon - spec); + if (authorityLen || pathLen) { + uint32_t schemeLen = colon + 1 - spec; + offset += schemeLen; + ParseAfterScheme(colon + 1, specLen - schemeLen, authorityPos, + authorityLen, pathPos, pathLen); + OFFSET_RESULT(authority, offset); + OFFSET_RESULT(path, offset); + } + } else { + // + // spec = <authority-no-port-or-password>/<path> + // spec = <path> + // + // or + // + // spec = <authority-no-port-or-password>/<path-with-colon> + // spec = <path-with-colon> + // + // or + // + // spec = <authority-no-port-or-password> + // spec = <path-no-slashes-or-colon> + // + SET_RESULT(scheme, 0, -1); + if (authorityLen || pathLen) { + ParseAfterScheme(spec, specLen, authorityPos, authorityLen, pathPos, + pathLen); + OFFSET_RESULT(authority, offset); + OFFSET_RESULT(path, offset); + } + } + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseAuthority(const char* auth, int32_t authLen, + uint32_t* usernamePos, int32_t* usernameLen, + uint32_t* passwordPos, int32_t* passwordLen, + uint32_t* hostnamePos, int32_t* hostnameLen, + int32_t* port) { + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } + + if (authLen < 0) authLen = strlen(auth); + + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + SET_RESULT(hostname, 0, authLen); + if (port) *port = -1; + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen, + uint32_t* usernamePos, int32_t* usernameLen, + uint32_t* passwordPos, int32_t* passwordLen) { + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen, + uint32_t* hostnamePos, int32_t* hostnameLen, + int32_t* port) { + SET_RESULT(hostname, 0, -1); + if (port) *port = -1; + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParsePath(const char* path, int32_t pathLen, + uint32_t* filepathPos, int32_t* filepathLen, + uint32_t* queryPos, int32_t* queryLen, + uint32_t* refPos, int32_t* refLen) { + if (NS_WARN_IF(!path)) { + return NS_ERROR_INVALID_POINTER; + } + + if (pathLen < 0) pathLen = strlen(path); + + // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref> + + // XXX PL_strnpbrk would be nice, but it's buggy + + // search for first occurrence of either ? or # + const char *query_beg = nullptr, *query_end = nullptr; + const char* ref_beg = nullptr; + const char* p = nullptr; + for (p = path; p < path + pathLen; ++p) { + // only match the query string if it precedes the reference fragment + if (!ref_beg && !query_beg && *p == '?') { + query_beg = p + 1; + } else if (*p == '#') { + ref_beg = p + 1; + if (query_beg) query_end = p; + break; + } + } + + if (query_beg) { + if (query_end) { + SET_RESULT(query, query_beg - path, query_end - query_beg); + } else { + SET_RESULT(query, query_beg - path, pathLen - (query_beg - path)); + } + } else { + SET_RESULT(query, 0, -1); + } + + if (ref_beg) { + SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path)); + } else { + SET_RESULT(ref, 0, -1); + } + + const char* end; + if (query_beg) { + end = query_beg - 1; + } else if (ref_beg) { + end = ref_beg - 1; + } else { + end = path + pathLen; + } + + // an empty file path is no file path + if (end != path) { + SET_RESULT(filepath, 0, end - path); + } else { + SET_RESULT(filepath, 0, -1); + } + return NS_OK; +} + +NS_IMETHODIMP +nsBaseURLParser::ParseFilePath(const char* filepath, int32_t filepathLen, + uint32_t* directoryPos, int32_t* directoryLen, + uint32_t* basenamePos, int32_t* basenameLen, + uint32_t* extensionPos, int32_t* extensionLen) { + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filepathLen < 0) filepathLen = strlen(filepath); + + if (filepathLen == 0) { + SET_RESULT(directory, 0, -1); + SET_RESULT(basename, 0, 0); // assume a zero length file basename + SET_RESULT(extension, 0, -1); + return NS_OK; + } + + const char* p; + const char* end = filepath + filepathLen; + + // search backwards for filename + for (p = end - 1; *p != '/' && p > filepath; --p) { + ; + } + if (*p == '/') { + // catch /.. and /. + if ((p + 1 < end && *(p + 1) == '.') && + (p + 2 == end || (*(p + 2) == '.' && p + 3 == end))) { + p = end - 1; + } + // filepath = <directory><filename>.<extension> + SET_RESULT(directory, 0, p - filepath + 1); + ParseFileName(p + 1, end - (p + 1), basenamePos, basenameLen, extensionPos, + extensionLen); + OFFSET_RESULT(basename, p + 1 - filepath); + OFFSET_RESULT(extension, p + 1 - filepath); + } else { + // filepath = <filename>.<extension> + SET_RESULT(directory, 0, -1); + ParseFileName(filepath, filepathLen, basenamePos, basenameLen, extensionPos, + extensionLen); + } + return NS_OK; +} + +nsresult nsBaseURLParser::ParseFileName( + const char* filename, int32_t filenameLen, uint32_t* basenamePos, + int32_t* basenameLen, uint32_t* extensionPos, int32_t* extensionLen) { + if (NS_WARN_IF(!filename)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filenameLen < 0) filenameLen = strlen(filename); + + // no extension if filename ends with a '.' + if (filename[filenameLen - 1] != '.') { + // ignore '.' at the beginning + for (const char* p = filename + filenameLen - 1; p > filename; --p) { + if (*p == '.') { + // filename = <basename.extension> + SET_RESULT(basename, 0, p - filename); + SET_RESULT(extension, p + 1 - filename, + filenameLen - (p - filename + 1)); + return NS_OK; + } + } + } + // filename = <basename> + SET_RESULT(basename, 0, filenameLen); + SET_RESULT(extension, 0, -1); + return NS_OK; +} + +//---------------------------------------------------------------------------- +// nsNoAuthURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMETHODIMP +nsNoAuthURLParser::ParseAuthority(const char* auth, int32_t authLen, + uint32_t* usernamePos, int32_t* usernameLen, + uint32_t* passwordPos, int32_t* passwordLen, + uint32_t* hostnamePos, int32_t* hostnameLen, + int32_t* port) { + MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!"); + return NS_ERROR_UNEXPECTED; +} + +void nsNoAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen, + uint32_t* authPos, int32_t* authLen, + uint32_t* pathPos, int32_t* pathLen) { + MOZ_ASSERT(specLen >= 0, "unexpected"); + + // everything is the path + uint32_t pos = 0; + switch (CountConsecutiveSlashes(spec, specLen)) { + case 0: + case 1: + break; + case 2: { + const char* p = nullptr; + if (specLen > 2) { + // looks like there is an authority section + + // if the authority looks like a drive number then we + // really want to treat it as part of the path + // [a-zA-Z][:|]{/\} + // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo + if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') && + IsAsciiAlpha(spec[2]) && + ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) { + pos = 1; + break; + } + // Ignore apparent authority; path is everything after it + for (p = spec + 2; p < spec + specLen; ++p) { + if (*p == '/' || *p == '?' || *p == '#') break; + } + } + SET_RESULT(auth, 0, -1); + if (p && p != spec + specLen) { + SET_RESULT(path, p - spec, specLen - (p - spec)); + } else { + SET_RESULT(path, 0, -1); + } + return; + } + default: + pos = 2; + break; + } + SET_RESULT(auth, pos, 0); + SET_RESULT(path, pos, specLen - pos); +} + +#if defined(XP_WIN) +NS_IMETHODIMP +nsNoAuthURLParser::ParseFilePath(const char* filepath, int32_t filepathLen, + uint32_t* directoryPos, int32_t* directoryLen, + uint32_t* basenamePos, int32_t* basenameLen, + uint32_t* extensionPos, + int32_t* extensionLen) { + if (NS_WARN_IF(!filepath)) { + return NS_ERROR_INVALID_POINTER; + } + + if (filepathLen < 0) filepathLen = strlen(filepath); + + // look for a filepath consisting of only a drive number, which may or + // may not have a leading slash. + if (filepathLen > 1 && filepathLen < 4) { + const char* end = filepath + filepathLen; + const char* p = filepath; + if (*p == '/') p++; + if ((end - p == 2) && (p[1] == ':' || p[1] == '|') && IsAsciiAlpha(*p)) { + // filepath = <drive-number>: + SET_RESULT(directory, 0, filepathLen); + SET_RESULT(basename, 0, -1); + SET_RESULT(extension, 0, -1); + return NS_OK; + } + } + + // otherwise fallback on common implementation + return nsBaseURLParser::ParseFilePath(filepath, filepathLen, directoryPos, + directoryLen, basenamePos, basenameLen, + extensionPos, extensionLen); +} +#endif + +//---------------------------------------------------------------------------- +// nsAuthURLParser implementation +//---------------------------------------------------------------------------- + +NS_IMETHODIMP +nsAuthURLParser::ParseAuthority(const char* auth, int32_t authLen, + uint32_t* usernamePos, int32_t* usernameLen, + uint32_t* passwordPos, int32_t* passwordLen, + uint32_t* hostnamePos, int32_t* hostnameLen, + int32_t* port) { + nsresult rv; + + if (NS_WARN_IF(!auth)) { + return NS_ERROR_INVALID_POINTER; + } + + if (authLen < 0) authLen = strlen(auth); + + if (authLen == 0) { + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + SET_RESULT(hostname, 0, 0); + if (port) *port = -1; + return NS_OK; + } + + // search backwards for @ + const char* p = auth + authLen - 1; + for (; (*p != '@') && (p > auth); --p) { + } + if (*p == '@') { + // auth = <user-info@server-info> + rv = ParseUserInfo(auth, p - auth, usernamePos, usernameLen, passwordPos, + passwordLen); + if (NS_FAILED(rv)) return rv; + rv = ParseServerInfo(p + 1, authLen - (p - auth + 1), hostnamePos, + hostnameLen, port); + if (NS_FAILED(rv)) return rv; + OFFSET_RESULT(hostname, p + 1 - auth); + + // malformed if has a username or password + // but no host info, such as: http://u:p@/ + if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) { + return NS_ERROR_MALFORMED_URI; + } + } else { + // auth = <server-info> + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + rv = ParseServerInfo(auth, authLen, hostnamePos, hostnameLen, port); + if (NS_FAILED(rv)) return rv; + } + return NS_OK; +} + +NS_IMETHODIMP +nsAuthURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen, + uint32_t* usernamePos, int32_t* usernameLen, + uint32_t* passwordPos, int32_t* passwordLen) { + if (NS_WARN_IF(!userinfo)) { + return NS_ERROR_INVALID_POINTER; + } + + if (userinfoLen < 0) userinfoLen = strlen(userinfo); + + if (userinfoLen == 0) { + SET_RESULT(username, 0, -1); + SET_RESULT(password, 0, -1); + return NS_OK; + } + + const char* p = (const char*)memchr(userinfo, ':', userinfoLen); + if (p) { + // userinfo = <username:password> + SET_RESULT(username, 0, p - userinfo); + SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1)); + } else { + // userinfo = <username> + SET_RESULT(username, 0, userinfoLen); + SET_RESULT(password, 0, -1); + } + return NS_OK; +} + +NS_IMETHODIMP +nsAuthURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen, + uint32_t* hostnamePos, int32_t* hostnameLen, + int32_t* port) { + if (NS_WARN_IF(!serverinfo)) { + return NS_ERROR_INVALID_POINTER; + } + + if (serverinfoLen < 0) serverinfoLen = strlen(serverinfo); + + if (serverinfoLen == 0) { + SET_RESULT(hostname, 0, 0); + if (port) *port = -1; + return NS_OK; + } + + // search backwards for a ':' but stop on ']' (IPv6 address literal + // delimiter). check for illegal characters in the hostname. + const char* p = serverinfo + serverinfoLen - 1; + const char *colon = nullptr, *bracket = nullptr; + for (; p > serverinfo; --p) { + switch (*p) { + case ']': + bracket = p; + break; + case ':': + if (bracket == nullptr) colon = p; + break; + case ' ': + // hostname must not contain a space + return NS_ERROR_MALFORMED_URI; + } + } + + if (colon) { + // serverinfo = <hostname:port> + SET_RESULT(hostname, 0, colon - serverinfo); + if (port) { + // XXX unfortunately ToInteger is not defined for substrings + nsAutoCString buf(colon + 1, serverinfoLen - (colon + 1 - serverinfo)); + if (buf.Length() == 0) { + *port = -1; + } else { + const char* nondigit = NS_strspnp("0123456789", buf.get()); + if (nondigit && *nondigit) return NS_ERROR_MALFORMED_URI; + + nsresult err; + *port = buf.ToInteger(&err); + if (NS_FAILED(err) || *port < 0 || + *port > std::numeric_limits<uint16_t>::max()) { + return NS_ERROR_MALFORMED_URI; + } + } + } + } else { + // serverinfo = <hostname> + SET_RESULT(hostname, 0, serverinfoLen); + if (port) *port = -1; + } + + // In case of IPv6 address check its validity + if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' && + *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' && + !net_IsValidIPv6Addr( + Substring(serverinfo + *hostnamePos + 1, *hostnameLen - 2))) { + return NS_ERROR_MALFORMED_URI; + } + + return NS_OK; +} + +void nsAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen, + uint32_t* authPos, int32_t* authLen, + uint32_t* pathPos, int32_t* pathLen) { + MOZ_ASSERT(specLen >= 0, "unexpected"); + + uint32_t nslash = CountConsecutiveSlashes(spec, specLen); + + // search for the end of the authority section + const char* end = spec + specLen; + const char* p; + for (p = spec + nslash; p < end; ++p) { + if (*p == '/' || *p == '?' || *p == '#') break; + } + if (p < end) { + // spec = [/]<auth><path> + SET_RESULT(auth, nslash, p - (spec + nslash)); + SET_RESULT(path, p - spec, specLen - (p - spec)); + } else { + // spec = [/]<auth> + SET_RESULT(auth, nslash, specLen - nslash); + SET_RESULT(path, 0, -1); + } +} + +//---------------------------------------------------------------------------- +// nsStdURLParser implementation +//---------------------------------------------------------------------------- + +void nsStdURLParser::ParseAfterScheme(const char* spec, int32_t specLen, + uint32_t* authPos, int32_t* authLen, + uint32_t* pathPos, int32_t* pathLen) { + MOZ_ASSERT(specLen >= 0, "unexpected"); + + uint32_t nslash = CountConsecutiveSlashes(spec, specLen); + + // search for the end of the authority section + const char* end = spec + specLen; + const char* p; + for (p = spec + nslash; p < end; ++p) { + if (strchr("/?#;", *p)) break; + } + switch (nslash) { + case 0: + case 2: + if (p < end) { + // spec = (//)<auth><path> + SET_RESULT(auth, nslash, p - (spec + nslash)); + SET_RESULT(path, p - spec, specLen - (p - spec)); + } else { + // spec = (//)<auth> + SET_RESULT(auth, nslash, specLen - nslash); + SET_RESULT(path, 0, -1); + } + break; + case 1: + // spec = /<path> + SET_RESULT(auth, 0, -1); + SET_RESULT(path, 0, specLen); + break; + default: + // spec = ///[/]<path> + SET_RESULT(auth, 2, 0); + SET_RESULT(path, 2, specLen - 2); + } +} |