298 lines
9.1 KiB
C++
298 lines
9.1 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||
|
||
#include "IPv4Parser.h"
|
||
#include "mozilla/EndianUtils.h"
|
||
#include "nsPrintfCString.h"
|
||
#include "nsTArray.h"
|
||
|
||
namespace mozilla::net::IPv4Parser {
|
||
|
||
// https://url.spec.whatwg.org/#ends-in-a-number-checker
|
||
bool EndsInANumber(const nsCString& input) {
|
||
// 1. Let parts be the result of strictly splitting input on U+002E (.).
|
||
nsTArray<nsDependentCSubstring> parts;
|
||
for (const nsDependentCSubstring& part : input.Split('.')) {
|
||
parts.AppendElement(part);
|
||
}
|
||
|
||
if (parts.Length() == 0) {
|
||
return false;
|
||
}
|
||
|
||
// 2.If the last item in parts is the empty string, then:
|
||
// 1. If parts’s size is 1, then return false.
|
||
// 2. Remove the last item from parts.
|
||
if (parts.LastElement().IsEmpty()) {
|
||
if (parts.Length() == 1) {
|
||
return false;
|
||
}
|
||
Unused << parts.PopLastElement();
|
||
}
|
||
|
||
// 3. Let last be the last item in parts.
|
||
const nsDependentCSubstring& last = parts.LastElement();
|
||
|
||
// 4. If last is non-empty and contains only ASCII digits, then return true.
|
||
// The erroneous input "09" will be caught by the IPv4 parser at a later
|
||
// stage.
|
||
if (!last.IsEmpty()) {
|
||
if (ContainsOnlyAsciiDigits(last)) {
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// 5. If parsing last as an IPv4 number does not return failure, then return
|
||
// true. This is equivalent to checking that last is "0X" or "0x", followed by
|
||
// zero or more ASCII hex digits.
|
||
if (StringBeginsWith(last, "0x"_ns) || StringBeginsWith(last, "0X"_ns)) {
|
||
if (ContainsOnlyAsciiHexDigits(Substring(last, 2))) {
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
nsresult ParseIPv4Number10(const nsACString& input, uint32_t& number,
|
||
uint32_t maxNumber) {
|
||
uint64_t value = 0;
|
||
const char* current = input.BeginReading();
|
||
const char* end = input.EndReading();
|
||
for (; current < end; ++current) {
|
||
char c = *current;
|
||
MOZ_ASSERT(c >= '0' && c <= '9');
|
||
value *= 10;
|
||
value += c - '0';
|
||
}
|
||
if (value <= maxNumber) {
|
||
number = value;
|
||
return NS_OK;
|
||
}
|
||
|
||
// The error case
|
||
number = 0;
|
||
return NS_ERROR_FAILURE;
|
||
}
|
||
|
||
nsresult ParseIPv4Number(const nsACString& input, int32_t base,
|
||
uint32_t& number, uint32_t maxNumber) {
|
||
// Accumulate in the 64-bit value
|
||
uint64_t value = 0;
|
||
const char* current = input.BeginReading();
|
||
const char* end = input.EndReading();
|
||
switch (base) {
|
||
case 16:
|
||
++current;
|
||
[[fallthrough]];
|
||
case 8:
|
||
++current;
|
||
break;
|
||
case 10:
|
||
default:
|
||
break;
|
||
}
|
||
for (; current < end; ++current) {
|
||
value *= base;
|
||
char c = *current;
|
||
MOZ_ASSERT((base == 10 && IsAsciiDigit(c)) ||
|
||
(base == 8 && c >= '0' && c <= '7') ||
|
||
(base == 16 && IsAsciiHexDigit(c)));
|
||
if (IsAsciiDigit(c)) {
|
||
value += c - '0';
|
||
} else if (c >= 'a' && c <= 'f') {
|
||
value += c - 'a' + 10;
|
||
} else if (c >= 'A' && c <= 'F') {
|
||
value += c - 'A' + 10;
|
||
}
|
||
}
|
||
|
||
if (value <= maxNumber) {
|
||
number = value;
|
||
return NS_OK;
|
||
}
|
||
|
||
// The error case
|
||
number = 0;
|
||
return NS_ERROR_FAILURE;
|
||
}
|
||
|
||
// IPv4 parser spec: https://url.spec.whatwg.org/#concept-ipv4-parser
|
||
nsresult NormalizeIPv4(const nsACString& host, nsCString& result) {
|
||
int32_t bases[4] = {10, 10, 10, 10};
|
||
bool onlyBase10 = true; // Track this as a special case
|
||
int32_t dotIndex[3]; // The positions of the dots in the string
|
||
|
||
// Use "length" rather than host.Length() after call to
|
||
// ValidateIPv4Number because of potential trailing period.
|
||
nsDependentCSubstring filteredHost;
|
||
bool trailingDot = false;
|
||
if (host.Length() > 0 && host.Last() == '.') {
|
||
trailingDot = true;
|
||
filteredHost.Rebind(host.BeginReading(), host.Length() - 1);
|
||
} else {
|
||
filteredHost.Rebind(host.BeginReading(), host.Length());
|
||
}
|
||
|
||
int32_t length = static_cast<int32_t>(filteredHost.Length());
|
||
int32_t dotCount = ValidateIPv4Number(filteredHost, bases, dotIndex,
|
||
onlyBase10, length, trailingDot);
|
||
if (dotCount < 0 || length <= 0) {
|
||
return NS_ERROR_FAILURE;
|
||
}
|
||
|
||
// Max values specified by the spec
|
||
static const uint32_t upperBounds[] = {0xffffffffu, 0xffffffu, 0xffffu,
|
||
0xffu};
|
||
uint32_t ipv4;
|
||
int32_t start = (dotCount > 0 ? dotIndex[dotCount - 1] + 1 : 0);
|
||
|
||
// parse the last part first
|
||
nsresult res;
|
||
// Doing a special case for all items being base 10 gives ~35% speedup
|
||
res = (onlyBase10
|
||
? ParseIPv4Number10(Substring(host, start, length - start), ipv4,
|
||
upperBounds[dotCount])
|
||
: ParseIPv4Number(Substring(host, start, length - start),
|
||
bases[dotCount], ipv4, upperBounds[dotCount]));
|
||
if (NS_FAILED(res)) {
|
||
return NS_ERROR_FAILURE;
|
||
}
|
||
|
||
// parse remaining parts starting from first part
|
||
int32_t lastUsed = -1;
|
||
for (int32_t i = 0; i < dotCount; i++) {
|
||
uint32_t number;
|
||
start = lastUsed + 1;
|
||
lastUsed = dotIndex[i];
|
||
res =
|
||
(onlyBase10 ? ParseIPv4Number10(
|
||
Substring(host, start, lastUsed - start), number, 255)
|
||
: ParseIPv4Number(Substring(host, start, lastUsed - start),
|
||
bases[i], number, 255));
|
||
if (NS_FAILED(res)) {
|
||
return NS_ERROR_FAILURE;
|
||
}
|
||
ipv4 += number << (8 * (3 - i));
|
||
}
|
||
|
||
// A special case for ipv4 URL like "127." should have the same result as
|
||
// "127".
|
||
if (dotCount == 1 && dotIndex[0] == length - 1) {
|
||
ipv4 = (ipv4 & 0xff000000) >> 24;
|
||
}
|
||
|
||
uint8_t ipSegments[4];
|
||
NetworkEndian::writeUint32(ipSegments, ipv4);
|
||
result = nsPrintfCString("%d.%d.%d.%d", ipSegments[0], ipSegments[1],
|
||
ipSegments[2], ipSegments[3]);
|
||
return NS_OK;
|
||
}
|
||
|
||
// Return the number of "dots" in the string, or -1 if invalid. Note that the
|
||
// number of relevant entries in the bases/starts/ends arrays is number of
|
||
// dots + 1.
|
||
//
|
||
// length is assumed to be <= host.Length(); the caller is responsible for that
|
||
//
|
||
// Note that the value returned is guaranteed to be in [-1, 3] range.
|
||
int32_t ValidateIPv4Number(const nsACString& host, int32_t bases[4],
|
||
int32_t dotIndex[3], bool& onlyBase10,
|
||
int32_t length, bool trailingDot) {
|
||
MOZ_ASSERT(length <= (int32_t)host.Length());
|
||
if (length <= 0) {
|
||
return -1;
|
||
}
|
||
|
||
bool lastWasNumber = false; // We count on this being false for i == 0
|
||
int32_t dotCount = 0;
|
||
onlyBase10 = true;
|
||
|
||
for (int32_t i = 0; i < length; i++) {
|
||
char current = host[i];
|
||
if (current == '.') {
|
||
// A dot should not follow a dot, or be first - it can follow an x though.
|
||
if (!(lastWasNumber ||
|
||
(i >= 2 && (host[i - 1] == 'X' || host[i - 1] == 'x') &&
|
||
host[i - 2] == '0')) ||
|
||
(i == (length - 1) && trailingDot)) {
|
||
return -1;
|
||
}
|
||
|
||
if (dotCount > 2) {
|
||
return -1;
|
||
}
|
||
lastWasNumber = false;
|
||
dotIndex[dotCount] = i;
|
||
dotCount++;
|
||
} else if (current == 'X' || current == 'x') {
|
||
if (!lastWasNumber || // An X should not follow an X or a dot or be first
|
||
i == (length - 1) || // No trailing Xs allowed
|
||
(dotCount == 0 &&
|
||
i != 1) || // If we had no dots, an X should be second
|
||
host[i - 1] != '0' || // X should always follow a 0. Guaranteed i >
|
||
// 0 as lastWasNumber is true
|
||
(dotCount > 0 &&
|
||
host[i - 2] != '.')) { // And that zero follows a dot if it exists
|
||
return -1;
|
||
}
|
||
lastWasNumber = false;
|
||
bases[dotCount] = 16;
|
||
onlyBase10 = false;
|
||
|
||
} else if (current == '0') {
|
||
if (i < length - 1 && // Trailing zero doesn't signal octal
|
||
host[i + 1] != '.' && // Lone zero is not octal
|
||
(i == 0 || host[i - 1] == '.')) { // Zero at start or following a dot
|
||
// is a candidate for octal
|
||
bases[dotCount] = 8; // This will turn to 16 above if X shows up
|
||
onlyBase10 = false;
|
||
}
|
||
lastWasNumber = true;
|
||
|
||
} else if (current >= '1' && current <= '7') {
|
||
lastWasNumber = true;
|
||
|
||
} else if (current >= '8' && current <= '9') {
|
||
if (bases[dotCount] == 8) {
|
||
return -1;
|
||
}
|
||
lastWasNumber = true;
|
||
|
||
} else if ((current >= 'a' && current <= 'f') ||
|
||
(current >= 'A' && current <= 'F')) {
|
||
if (bases[dotCount] != 16) {
|
||
return -1;
|
||
}
|
||
lastWasNumber = true;
|
||
|
||
} else {
|
||
return -1;
|
||
}
|
||
}
|
||
|
||
return dotCount;
|
||
}
|
||
|
||
bool ContainsOnlyAsciiDigits(const nsDependentCSubstring& input) {
|
||
for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) {
|
||
if (!IsAsciiDigit(*c)) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
bool ContainsOnlyAsciiHexDigits(const nsDependentCSubstring& input) {
|
||
for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) {
|
||
if (!IsAsciiHexDigit(*c)) {
|
||
return false;
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
} // namespace mozilla::net::IPv4Parser
|