diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /security/sandbox/chromium/base/strings | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/sandbox/chromium/base/strings')
26 files changed, 7090 insertions, 0 deletions
diff --git a/security/sandbox/chromium/base/strings/char_traits.h b/security/sandbox/chromium/base/strings/char_traits.h new file mode 100644 index 0000000000..b193e216cc --- /dev/null +++ b/security/sandbox/chromium/base/strings/char_traits.h @@ -0,0 +1,92 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_CHAR_TRAITS_H_ +#define BASE_STRINGS_CHAR_TRAITS_H_ + +#include <stddef.h> + +#include "base/compiler_specific.h" + +namespace base { + +// constexpr version of http://en.cppreference.com/w/cpp/string/char_traits. +// This currently just implements the bits needed to support a (mostly) +// constexpr StringPiece. +// +// TODO(dcheng): Once we switch to C++17, most methods will become constexpr and +// we can switch over to using the one in the standard library. +template <typename T> +struct CharTraits { + // Performs a lexographical comparison of the first N characters of |s1| and + // |s2|. Returns 0 if equal, -1 if |s1| is less than |s2|, and 1 if |s1| is + // greater than |s2|. + static constexpr int compare(const T* s1, const T* s2, size_t n) noexcept; + + // Returns the length of |s|, assuming null termination (and not including the + // terminating null). + static constexpr size_t length(const T* s) noexcept; +}; + +template <typename T> +constexpr int CharTraits<T>::compare(const T* s1, + const T* s2, + size_t n) noexcept { + for (; n; --n, ++s1, ++s2) { + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return 1; + } + return 0; +} + +template <typename T> +constexpr size_t CharTraits<T>::length(const T* s) noexcept { + size_t i = 0; + for (; *s; ++s) + ++i; + return i; +} + +// char specialization of CharTraits that can use clang's constexpr instrinsics, +// where available. +template <> +struct CharTraits<char> { + static constexpr int compare(const char* s1, + const char* s2, + size_t n) noexcept; + static constexpr size_t length(const char* s) noexcept; +}; + +constexpr int CharTraits<char>::compare(const char* s1, + const char* s2, + size_t n) noexcept { +#if HAS_FEATURE(cxx_constexpr_string_builtins) + return __builtin_memcmp(s1, s2, n); +#else + for (; n; --n, ++s1, ++s2) { + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return 1; + } + return 0; +#endif +} + +constexpr size_t CharTraits<char>::length(const char* s) noexcept { +#if defined(__clang__) + return __builtin_strlen(s); +#else + size_t i = 0; + for (; *s; ++s) + ++i; + return i; +#endif +} + +} // namespace base + +#endif // BASE_STRINGS_CHAR_TRAITS_H_ diff --git a/security/sandbox/chromium/base/strings/nullable_string16.cc b/security/sandbox/chromium/base/strings/nullable_string16.cc new file mode 100644 index 0000000000..076b282eb1 --- /dev/null +++ b/security/sandbox/chromium/base/strings/nullable_string16.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/nullable_string16.h" + +#include <ostream> +#include <utility> + +namespace base { +NullableString16::NullableString16() = default; +NullableString16::NullableString16(const NullableString16& other) = default; +NullableString16::NullableString16(NullableString16&& other) = default; + +NullableString16::NullableString16(const string16& string, bool is_null) { + if (!is_null) + string_.emplace(string); +} + +NullableString16::NullableString16(Optional<string16> optional_string16) + : string_(std::move(optional_string16)) {} + +NullableString16::~NullableString16() = default; +NullableString16& NullableString16::operator=(const NullableString16& other) = + default; +NullableString16& NullableString16::operator=(NullableString16&& other) = + default; + +std::ostream& operator<<(std::ostream& out, const NullableString16& value) { + return value.is_null() ? out << "(null)" : out << value.string(); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/nullable_string16.h b/security/sandbox/chromium/base/strings/nullable_string16.h new file mode 100644 index 0000000000..abddee0f74 --- /dev/null +++ b/security/sandbox/chromium/base/strings/nullable_string16.h @@ -0,0 +1,55 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_NULLABLE_STRING16_H_ +#define BASE_STRINGS_NULLABLE_STRING16_H_ + +#include <iosfwd> + +#include "base/base_export.h" +#include "base/optional.h" +#include "base/strings/string16.h" +#include "base/strings/string_util.h" + +namespace base { + +// This class is a simple wrapper for string16 which also contains a null +// state. This should be used only where the difference between null and +// empty is meaningful. +class BASE_EXPORT NullableString16 { + public: + NullableString16(); + NullableString16(const NullableString16& other); + NullableString16(NullableString16&& other); + NullableString16(const string16& string, bool is_null); + explicit NullableString16(Optional<string16> optional_string16); + ~NullableString16(); + + NullableString16& operator=(const NullableString16& other); + NullableString16& operator=(NullableString16&& other); + + const string16& string() const { + return string_ ? *string_ : EmptyString16(); + } + bool is_null() const { return !string_; } + const Optional<string16>& as_optional_string16() const { return string_; } + + private: + Optional<string16> string_; +}; + +inline bool operator==(const NullableString16& a, const NullableString16& b) { + return a.as_optional_string16() == b.as_optional_string16(); +} + +inline bool operator!=(const NullableString16& a, const NullableString16& b) { + return !(a == b); +} + +BASE_EXPORT std::ostream& operator<<(std::ostream& out, + const NullableString16& value); + +} // namespace base + +#endif // BASE_STRINGS_NULLABLE_STRING16_H_ diff --git a/security/sandbox/chromium/base/strings/safe_sprintf.cc b/security/sandbox/chromium/base/strings/safe_sprintf.cc new file mode 100644 index 0000000000..89049abd79 --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf.cc @@ -0,0 +1,682 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/safe_sprintf.h" + +#include <errno.h> +#include <string.h> + +#include <algorithm> +#include <limits> + +#include "base/macros.h" +#include "build/build_config.h" + +#if !defined(NDEBUG) +// In debug builds, we use RAW_CHECK() to print useful error messages, if +// SafeSPrintf() is called with broken arguments. +// As our contract promises that SafeSPrintf() can be called from any +// restricted run-time context, it is not actually safe to call logging +// functions from it; and we only ever do so for debug builds and hope for the +// best. We should _never_ call any logging function other than RAW_CHECK(), +// and we should _never_ include any logging code that is active in production +// builds. Most notably, we should not include these logging functions in +// unofficial release builds, even though those builds would otherwise have +// DCHECKS() enabled. +// In other words; please do not remove the #ifdef around this #include. +// Instead, in production builds we opt for returning a degraded result, +// whenever an error is encountered. +// E.g. The broken function call +// SafeSPrintf("errno = %d (%x)", errno, strerror(errno)) +// will print something like +// errno = 13, (%x) +// instead of +// errno = 13 (Access denied) +// In most of the anticipated use cases, that's probably the preferred +// behavior. +#include "base/logging.h" +#define DEBUG_CHECK RAW_CHECK +#else +#define DEBUG_CHECK(x) do { if (x) { } } while (0) +#endif + +namespace base { +namespace strings { + +// The code in this file is extremely careful to be async-signal-safe. +// +// Most obviously, we avoid calling any code that could dynamically allocate +// memory. Doing so would almost certainly result in bugs and dead-locks. +// We also avoid calling any other STL functions that could have unintended +// side-effects involving memory allocation or access to other shared +// resources. +// +// But on top of that, we also avoid calling other library functions, as many +// of them have the side-effect of calling getenv() (in order to deal with +// localization) or accessing errno. The latter sounds benign, but there are +// several execution contexts where it isn't even possible to safely read let +// alone write errno. +// +// The stated design goal of the SafeSPrintf() function is that it can be +// called from any context that can safely call C or C++ code (i.e. anything +// that doesn't require assembly code). +// +// For a brief overview of some but not all of the issues with async-signal- +// safety, refer to: +// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html + +namespace { +const size_t kSSizeMaxConst = ((size_t)(ssize_t)-1) >> 1; + +const char kUpCaseHexDigits[] = "0123456789ABCDEF"; +const char kDownCaseHexDigits[] = "0123456789abcdef"; +} + +#if defined(NDEBUG) +// We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(), +// but C++ doesn't allow us to do that for constants. Instead, we have to +// use careful casting and shifting. We later use a static_assert to +// verify that this worked correctly. +namespace { +const size_t kSSizeMax = kSSizeMaxConst; +} +#else // defined(NDEBUG) +// For efficiency, we really need kSSizeMax to be a constant. But for unit +// tests, it should be adjustable. This allows us to verify edge cases without +// having to fill the entire available address space. As a compromise, we make +// kSSizeMax adjustable in debug builds, and then only compile that particular +// part of the unit test in debug builds. +namespace { +static size_t kSSizeMax = kSSizeMaxConst; +} + +namespace internal { +void SetSafeSPrintfSSizeMaxForTest(size_t max) { + kSSizeMax = max; +} + +size_t GetSafeSPrintfSSizeMaxForTest() { + return kSSizeMax; +} +} +#endif // defined(NDEBUG) + +namespace { +class Buffer { + public: + // |buffer| is caller-allocated storage that SafeSPrintf() writes to. It + // has |size| bytes of writable storage. It is the caller's responsibility + // to ensure that the buffer is at least one byte in size, so that it fits + // the trailing NUL that will be added by the destructor. The buffer also + // must be smaller or equal to kSSizeMax in size. + Buffer(char* buffer, size_t size) + : buffer_(buffer), + size_(size - 1), // Account for trailing NUL byte + count_(0) { +// MSVS2013's standard library doesn't mark max() as constexpr yet. cl.exe +// supports static_cast but doesn't really implement constexpr yet so it doesn't +// complain, but clang does. +#if __cplusplus >= 201103 && !(defined(__clang__) && defined(OS_WIN)) + static_assert(kSSizeMaxConst == + static_cast<size_t>(std::numeric_limits<ssize_t>::max()), + "kSSizeMaxConst should be the max value of an ssize_t"); +#endif + DEBUG_CHECK(size > 0); + DEBUG_CHECK(size <= kSSizeMax); + } + + ~Buffer() { + // The code calling the constructor guaranteed that there was enough space + // to store a trailing NUL -- and in debug builds, we are actually + // verifying this with DEBUG_CHECK()s in the constructor. So, we can + // always unconditionally write the NUL byte in the destructor. We do not + // need to adjust the count_, as SafeSPrintf() copies snprintf() in not + // including the NUL byte in its return code. + *GetInsertionPoint() = '\000'; + } + + // Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The + // caller can now stop adding more data, as GetCount() has reached its + // maximum possible value. + inline bool OutOfAddressableSpace() const { + return count_ == static_cast<size_t>(kSSizeMax - 1); + } + + // Returns the number of bytes that would have been emitted to |buffer_| + // if it was sized sufficiently large. This number can be larger than + // |size_|, if the caller provided an insufficiently large output buffer. + // But it will never be bigger than |kSSizeMax-1|. + inline ssize_t GetCount() const { + DEBUG_CHECK(count_ < kSSizeMax); + return static_cast<ssize_t>(count_); + } + + // Emits one |ch| character into the |buffer_| and updates the |count_| of + // characters that are currently supposed to be in the buffer. + // Returns "false", iff the buffer was already full. + // N.B. |count_| increases even if no characters have been written. This is + // needed so that GetCount() can return the number of bytes that should + // have been allocated for the |buffer_|. + inline bool Out(char ch) { + if (size_ >= 1 && count_ < size_) { + buffer_[count_] = ch; + return IncrementCountByOne(); + } + // |count_| still needs to be updated, even if the buffer has been + // filled completely. This allows SafeSPrintf() to return the number of + // bytes that should have been emitted. + IncrementCountByOne(); + return false; + } + + // Inserts |padding|-|len| bytes worth of padding into the |buffer_|. + // |count_| will also be incremented by the number of bytes that were meant + // to be emitted. The |pad| character is typically either a ' ' space + // or a '0' zero, but other non-NUL values are legal. + // Returns "false", iff the the |buffer_| filled up (i.e. |count_| + // overflowed |size_|) at any time during padding. + inline bool Pad(char pad, size_t padding, size_t len) { + DEBUG_CHECK(pad); + DEBUG_CHECK(padding <= kSSizeMax); + for (; padding > len; --padding) { + if (!Out(pad)) { + if (--padding) { + IncrementCount(padding-len); + } + return false; + } + } + return true; + } + + // POSIX doesn't define any async-signal-safe function for converting + // an integer to ASCII. Define our own version. + // + // This also gives us the ability to make the function a little more + // powerful and have it deal with |padding|, with truncation, and with + // predicting the length of the untruncated output. + // + // IToASCII() converts an integer |i| to ASCII. + // + // Unlike similar functions in the standard C library, it never appends a + // NUL character. This is left for the caller to do. + // + // While the function signature takes a signed int64_t, the code decides at + // run-time whether to treat the argument as signed (int64_t) or as unsigned + // (uint64_t) based on the value of |sign|. + // + // It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have + // a |sign|. Otherwise, |i| is treated as unsigned. + // + // For bases larger than 10, |upcase| decides whether lower-case or upper- + // case letters should be used to designate digits greater than 10. + // + // Padding can be done with either '0' zeros or ' ' spaces. Padding has to + // be positive and will always be applied to the left of the output. + // + // Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to + // the left of |padding|, if |pad| is '0'; and to the right of |padding| + // if |pad| is ' '. + // + // Returns "false", if the |buffer_| overflowed at any time. + bool IToASCII(bool sign, bool upcase, int64_t i, int base, + char pad, size_t padding, const char* prefix); + + private: + // Increments |count_| by |inc| unless this would cause |count_| to + // overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected; + // it then clamps |count_| to |kSSizeMax-1|. + inline bool IncrementCount(size_t inc) { + // "inc" is either 1 or a "padding" value. Padding is clamped at + // run-time to at most kSSizeMax-1. So, we know that "inc" is always in + // the range 1..kSSizeMax-1. + // This allows us to compute "kSSizeMax - 1 - inc" without incurring any + // integer overflows. + DEBUG_CHECK(inc <= kSSizeMax - 1); + if (count_ > kSSizeMax - 1 - inc) { + count_ = kSSizeMax - 1; + return false; + } + count_ += inc; + return true; + } + + // Convenience method for the common case of incrementing |count_| by one. + inline bool IncrementCountByOne() { + return IncrementCount(1); + } + + // Return the current insertion point into the buffer. This is typically + // at |buffer_| + |count_|, but could be before that if truncation + // happened. It always points to one byte past the last byte that was + // successfully placed into the |buffer_|. + inline char* GetInsertionPoint() const { + size_t idx = count_; + if (idx > size_) { + idx = size_; + } + return buffer_ + idx; + } + + // User-provided buffer that will receive the fully formatted output string. + char* buffer_; + + // Number of bytes that are available in the buffer excluding the trailing + // NUL byte that will be added by the destructor. + const size_t size_; + + // Number of bytes that would have been emitted to the buffer, if the buffer + // was sufficiently big. This number always excludes the trailing NUL byte + // and it is guaranteed to never grow bigger than kSSizeMax-1. + size_t count_; + + DISALLOW_COPY_AND_ASSIGN(Buffer); +}; + + +bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base, + char pad, size_t padding, const char* prefix) { + // Sanity check for parameters. None of these should ever fail, but see + // above for the rationale why we can't call CHECK(). + DEBUG_CHECK(base >= 2); + DEBUG_CHECK(base <= 16); + DEBUG_CHECK(!sign || base == 10); + DEBUG_CHECK(pad == '0' || pad == ' '); + DEBUG_CHECK(padding <= kSSizeMax); + DEBUG_CHECK(!(sign && prefix && *prefix)); + + // Handle negative numbers, if the caller indicated that |i| should be + // treated as a signed number; otherwise treat |i| as unsigned (even if the + // MSB is set!) + // Details are tricky, because of limited data-types, but equivalent pseudo- + // code would look like: + // if (sign && i < 0) + // prefix = "-"; + // num = abs(i); + int minint = 0; + uint64_t num; + if (sign && i < 0) { + prefix = "-"; + + // Turn our number positive. + if (i == std::numeric_limits<int64_t>::min()) { + // The most negative integer needs special treatment. + minint = 1; + num = static_cast<uint64_t>(-(i + 1)); + } else { + // "Normal" negative numbers are easy. + num = static_cast<uint64_t>(-i); + } + } else { + num = static_cast<uint64_t>(i); + } + + // If padding with '0' zero, emit the prefix or '-' character now. Otherwise, + // make the prefix accessible in reverse order, so that we can later output + // it right between padding and the number. + // We cannot choose the easier approach of just reversing the number, as that + // fails in situations where we need to truncate numbers that have padding + // and/or prefixes. + const char* reverse_prefix = nullptr; + if (prefix && *prefix) { + if (pad == '0') { + while (*prefix) { + if (padding) { + --padding; + } + Out(*prefix++); + } + prefix = nullptr; + } else { + for (reverse_prefix = prefix; *reverse_prefix; ++reverse_prefix) { + } + } + } else + prefix = nullptr; + const size_t prefix_length = reverse_prefix - prefix; + + // Loop until we have converted the entire number. Output at least one + // character (i.e. '0'). + size_t start = count_; + size_t discarded = 0; + bool started = false; + do { + // Make sure there is still enough space left in our output buffer. + if (count_ >= size_) { + if (start < size_) { + // It is rare that we need to output a partial number. But if asked + // to do so, we will still make sure we output the correct number of + // leading digits. + // Since we are generating the digits in reverse order, we actually + // have to discard digits in the order that we have already emitted + // them. This is essentially equivalent to: + // memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1) + for (char* move = buffer_ + start, *end = buffer_ + size_ - 1; + move < end; + ++move) { + *move = move[1]; + } + ++discarded; + --count_; + } else if (count_ - size_ > 1) { + // Need to increment either |count_| or |discarded| to make progress. + // The latter is more efficient, as it eventually triggers fast + // handling of padding. But we have to ensure we don't accidentally + // change the overall state (i.e. switch the state-machine from + // discarding to non-discarding). |count_| needs to always stay + // bigger than |size_|. + --count_; + ++discarded; + } + } + + // Output the next digit and (if necessary) compensate for the most + // negative integer needing special treatment. This works because, + // no matter the bit width of the integer, the lowest-most decimal + // integer always ends in 2, 4, 6, or 8. + if (!num && started) { + if (reverse_prefix > prefix) { + Out(*--reverse_prefix); + } else { + Out(pad); + } + } else { + started = true; + Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]); + } + + minint = 0; + num /= base; + + // Add padding, if requested. + if (padding > 0) { + --padding; + + // Performance optimization for when we are asked to output excessive + // padding, but our output buffer is limited in size. Even if we output + // a 64bit number in binary, we would never write more than 64 plus + // prefix non-padding characters. So, once this limit has been passed, + // any further state change can be computed arithmetically; we know that + // by this time, our entire final output consists of padding characters + // that have all already been output. + if (discarded > 8*sizeof(num) + prefix_length) { + IncrementCount(padding); + padding = 0; + } + } + } while (num || padding || (reverse_prefix > prefix)); + + // Conversion to ASCII actually resulted in the digits being in reverse + // order. We can't easily generate them in forward order, as we can't tell + // the number of characters needed until we are done converting. + // So, now, we reverse the string (except for the possible '-' sign). + char* front = buffer_ + start; + char* back = GetInsertionPoint(); + while (--back > front) { + char ch = *back; + *back = *front; + *front++ = ch; + } + + IncrementCount(discarded); + return !discarded; +} + +} // anonymous namespace + +namespace internal { + +ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, const Arg* args, + const size_t max_args) { + // Make sure that at least one NUL byte can be written, and that the buffer + // never overflows kSSizeMax. Not only does that use up most or all of the + // address space, it also would result in a return code that cannot be + // represented. + if (static_cast<ssize_t>(sz) < 1) + return -1; + sz = std::min(sz, kSSizeMax); + + // Iterate over format string and interpret '%' arguments as they are + // encountered. + Buffer buffer(buf, sz); + size_t padding; + char pad; + for (unsigned int cur_arg = 0; *fmt && !buffer.OutOfAddressableSpace(); ) { + if (*fmt++ == '%') { + padding = 0; + pad = ' '; + char ch = *fmt++; + format_character_found: + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // Found a width parameter. Convert to an integer value and store in + // "padding". If the leading digit is a zero, change the padding + // character from a space ' ' to a zero '0'. + pad = ch == '0' ? '0' : ' '; + for (;;) { + // The maximum allowed padding fills all the available address + // space and leaves just enough space to insert the trailing NUL. + const size_t max_padding = kSSizeMax - 1; + if (padding > max_padding/10 || + 10*padding > max_padding - (ch - '0')) { + DEBUG_CHECK(padding <= max_padding/10 && + 10*padding <= max_padding - (ch - '0')); + // Integer overflow detected. Skip the rest of the width until + // we find the format character, then do the normal error handling. + padding_overflow: + padding = max_padding; + while ((ch = *fmt++) >= '0' && ch <= '9') { + } + if (cur_arg < max_args) { + ++cur_arg; + } + goto fail_to_expand; + } + padding = 10*padding + ch - '0'; + if (padding > max_padding) { + // This doesn't happen for "sane" values of kSSizeMax. But once + // kSSizeMax gets smaller than about 10, our earlier range checks + // are incomplete. Unittests do trigger this artificial corner + // case. + DEBUG_CHECK(padding <= max_padding); + goto padding_overflow; + } + ch = *fmt++; + if (ch < '0' || ch > '9') { + // Reached the end of the width parameter. This is where the format + // character is found. + goto format_character_found; + } + } + break; + case 'c': { // Output an ASCII character. + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + // Check that the argument has the expected type. + const Arg& arg = args[cur_arg++]; + if (arg.type != Arg::INT && arg.type != Arg::UINT) { + DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT); + goto fail_to_expand; + } + + // Apply padding, if needed. + buffer.Pad(' ', padding, 1); + + // Convert the argument to an ASCII character and output it. + char as_char = static_cast<char>(arg.integer.i); + if (!as_char) { + goto end_of_output_buffer; + } + buffer.Out(as_char); + break; } + case 'd': // Output a possibly signed decimal value. + case 'o': // Output an unsigned octal value. + case 'x': // Output an unsigned hexadecimal value. + case 'X': + case 'p': { // Output a pointer value. + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + const Arg& arg = args[cur_arg++]; + int64_t i; + const char* prefix = nullptr; + if (ch != 'p') { + // Check that the argument has the expected type. + if (arg.type != Arg::INT && arg.type != Arg::UINT) { + DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT); + goto fail_to_expand; + } + i = arg.integer.i; + + if (ch != 'd') { + // The Arg() constructor automatically performed sign expansion on + // signed parameters. This is great when outputting a %d decimal + // number, but can result in unexpected leading 0xFF bytes when + // outputting a %x hexadecimal number. Mask bits, if necessary. + // We have to do this here, instead of in the Arg() constructor, as + // the Arg() constructor cannot tell whether we will output a %d + // or a %x. Only the latter should experience masking. + if (arg.integer.width < sizeof(int64_t)) { + i &= (1LL << (8*arg.integer.width)) - 1; + } + } + } else { + // Pointer values require an actual pointer or a string. + if (arg.type == Arg::POINTER) { + i = reinterpret_cast<uintptr_t>(arg.ptr); + } else if (arg.type == Arg::STRING) { + i = reinterpret_cast<uintptr_t>(arg.str); + } else if (arg.type == Arg::INT && + arg.integer.width == sizeof(NULL) && + arg.integer.i == 0) { // Allow C++'s version of NULL + i = 0; + } else { + DEBUG_CHECK(arg.type == Arg::POINTER || arg.type == Arg::STRING); + goto fail_to_expand; + } + + // Pointers always include the "0x" prefix. + prefix = "0x"; + } + + // Use IToASCII() to convert to ASCII representation. For decimal + // numbers, optionally print a sign. For hexadecimal numbers, + // distinguish between upper and lower case. %p addresses are always + // printed as upcase. Supports base 8, 10, and 16. Prints padding + // and/or prefixes, if so requested. + buffer.IToASCII(ch == 'd' && arg.type == Arg::INT, + ch != 'x', i, + ch == 'o' ? 8 : ch == 'd' ? 10 : 16, + pad, padding, prefix); + break; } + case 's': { + // Check that there are arguments left to be inserted. + if (cur_arg >= max_args) { + DEBUG_CHECK(cur_arg < max_args); + goto fail_to_expand; + } + + // Check that the argument has the expected type. + const Arg& arg = args[cur_arg++]; + const char *s; + if (arg.type == Arg::STRING) { + s = arg.str ? arg.str : "<NULL>"; + } else if (arg.type == Arg::INT && arg.integer.width == sizeof(NULL) && + arg.integer.i == 0) { // Allow C++'s version of NULL + s = "<NULL>"; + } else { + DEBUG_CHECK(arg.type == Arg::STRING); + goto fail_to_expand; + } + + // Apply padding, if needed. This requires us to first check the + // length of the string that we are outputting. + if (padding) { + size_t len = 0; + for (const char* src = s; *src++; ) { + ++len; + } + buffer.Pad(' ', padding, len); + } + + // Printing a string involves nothing more than copying it into the + // output buffer and making sure we don't output more bytes than + // available space; Out() takes care of doing that. + for (const char* src = s; *src; ) { + buffer.Out(*src++); + } + break; } + case '%': + // Quoted percent '%' character. + goto copy_verbatim; + fail_to_expand: + // C++ gives us tools to do type checking -- something that snprintf() + // could never really do. So, whenever we see arguments that don't + // match up with the format string, we refuse to output them. But + // since we have to be extremely conservative about being async- + // signal-safe, we are limited in the type of error handling that we + // can do in production builds (in debug builds we can use + // DEBUG_CHECK() and hope for the best). So, all we do is pass the + // format string unchanged. That should eventually get the user's + // attention; and in the meantime, it hopefully doesn't lose too much + // data. + default: + // Unknown or unsupported format character. Just copy verbatim to + // output. + buffer.Out('%'); + DEBUG_CHECK(ch); + if (!ch) { + goto end_of_format_string; + } + buffer.Out(ch); + break; + } + } else { + copy_verbatim: + buffer.Out(fmt[-1]); + } + } + end_of_format_string: + end_of_output_buffer: + return buffer.GetCount(); +} + +} // namespace internal + +ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt) { + // Make sure that at least one NUL byte can be written, and that the buffer + // never overflows kSSizeMax. Not only does that use up most or all of the + // address space, it also would result in a return code that cannot be + // represented. + if (static_cast<ssize_t>(sz) < 1) + return -1; + sz = std::min(sz, kSSizeMax); + + Buffer buffer(buf, sz); + + // In the slow-path, we deal with errors by copying the contents of + // "fmt" unexpanded. This means, if there are no arguments passed, the + // SafeSPrintf() function always degenerates to a version of strncpy() that + // de-duplicates '%' characters. + const char* src = fmt; + for (; *src; ++src) { + buffer.Out(*src); + DEBUG_CHECK(src[0] != '%' || src[1] == '%'); + if (src[0] == '%' && src[1] == '%') { + ++src; + } + } + return buffer.GetCount(); +} + +} // namespace strings +} // namespace base diff --git a/security/sandbox/chromium/base/strings/safe_sprintf.h b/security/sandbox/chromium/base/strings/safe_sprintf.h new file mode 100644 index 0000000000..01d649d07a --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf.h @@ -0,0 +1,246 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_SAFE_SPRINTF_H_ +#define BASE_STRINGS_SAFE_SPRINTF_H_ + +#include "build/build_config.h" + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +#if defined(OS_POSIX) || defined(OS_FUCHSIA) +// For ssize_t +#include <unistd.h> +#endif + +#include "base/base_export.h" + +namespace base { +namespace strings { + +#if defined(COMPILER_MSVC) +// Define ssize_t inside of our namespace. +#if defined(_WIN64) +typedef __int64 ssize_t; +#else +typedef long ssize_t; +#endif +#endif + +// SafeSPrintf() is a type-safe and completely self-contained version of +// snprintf(). +// +// SafeSNPrintf() is an alternative function signature that can be used when +// not dealing with fixed-sized buffers. When possible, SafeSPrintf() should +// always be used instead of SafeSNPrintf() +// +// These functions allow for formatting complicated messages from contexts that +// require strict async-signal-safety. In fact, it is safe to call them from +// any low-level execution context, as they are guaranteed to make no library +// or system calls. It deliberately never touches "errno", either. +// +// The only exception to this rule is that in debug builds the code calls +// RAW_CHECK() to help diagnose problems when the format string does not +// match the rest of the arguments. In release builds, no CHECK()s are used, +// and SafeSPrintf() instead returns an output string that expands only +// those arguments that match their format characters. Mismatched arguments +// are ignored. +// +// The code currently only supports a subset of format characters: +// %c, %o, %d, %x, %X, %p, and %s. +// +// SafeSPrintf() aims to be as liberal as reasonably possible. Integer-like +// values of arbitrary width can be passed to all of the format characters +// that expect integers. Thus, it is explicitly legal to pass an "int" to +// "%c", and output will automatically look at the LSB only. It is also +// explicitly legal to pass either signed or unsigned values, and the format +// characters will automatically interpret the arguments accordingly. +// +// It is still not legal to mix-and-match integer-like values with pointer +// values. For instance, you cannot pass a pointer to %x, nor can you pass an +// integer to %p. +// +// The one exception is "0" zero being accepted by "%p". This works-around +// the problem of C++ defining NULL as an integer-like value. +// +// All format characters take an optional width parameter. This must be a +// positive integer. For %d, %o, %x, %X and %p, if the width starts with +// a leading '0', padding is done with '0' instead of ' ' characters. +// +// There are a few features of snprintf()-style format strings, that +// SafeSPrintf() does not support at this time. +// +// If an actual user showed up, there is no particularly strong reason they +// couldn't be added. But that assumes that the trade-offs between complexity +// and utility are favorable. +// +// For example, adding support for negative padding widths, and for %n are all +// likely to be viewed positively. They are all clearly useful, low-risk, easy +// to test, don't jeopardize the async-signal-safety of the code, and overall +// have little impact on other parts of SafeSPrintf() function. +// +// On the other hands, adding support for alternate forms, positional +// arguments, grouping, wide characters, localization or floating point numbers +// are all unlikely to ever be added. +// +// SafeSPrintf() and SafeSNPrintf() mimic the behavior of snprintf() and they +// return the number of bytes needed to store the untruncated output. This +// does *not* include the terminating NUL byte. +// +// They return -1, iff a fatal error happened. This typically can only happen, +// if the buffer size is a) negative, or b) zero (i.e. not even the NUL byte +// can be written). The return value can never be larger than SSIZE_MAX-1. +// This ensures that the caller can always add one to the signed return code +// in order to determine the amount of storage that needs to be allocated. +// +// While the code supports type checking and while it is generally very careful +// to avoid printing incorrect values, it tends to be conservative in printing +// as much as possible, even when given incorrect parameters. Typically, in +// case of an error, the format string will not be expanded. (i.e. something +// like SafeSPrintf(buf, "%p %d", 1, 2) results in "%p 2"). See above for +// the use of RAW_CHECK() in debug builds, though. +// +// Basic example: +// char buf[20]; +// base::strings::SafeSPrintf(buf, "The answer: %2d", 42); +// +// Example with dynamically sized buffer (async-signal-safe). This code won't +// work on Visual studio, as it requires dynamically allocating arrays on the +// stack. Consider picking a smaller value for |kMaxSize| if stack size is +// limited and known. On the other hand, if the parameters to SafeSNPrintf() +// are trusted and not controllable by the user, you can consider eliminating +// the check for |kMaxSize| altogether. The current value of SSIZE_MAX is +// essentially a no-op that just illustrates how to implement an upper bound: +// const size_t kInitialSize = 128; +// const size_t kMaxSize = std::numeric_limits<ssize_t>::max(); +// size_t size = kInitialSize; +// for (;;) { +// char buf[size]; +// size = SafeSNPrintf(buf, size, "Error message \"%s\"\n", err) + 1; +// if (sizeof(buf) < kMaxSize && size > kMaxSize) { +// size = kMaxSize; +// continue; +// } else if (size > sizeof(buf)) +// continue; +// write(2, buf, size-1); +// break; +// } + +namespace internal { +// Helpers that use C++ overloading, templates, and specializations to deduce +// and record type information from function arguments. This allows us to +// later write a type-safe version of snprintf(). + +struct Arg { + enum Type { INT, UINT, STRING, POINTER }; + + // Any integer-like value. + Arg(signed char c) : type(INT) { + integer.i = c; + integer.width = sizeof(char); + } + Arg(unsigned char c) : type(UINT) { + integer.i = c; + integer.width = sizeof(char); + } + Arg(signed short j) : type(INT) { + integer.i = j; + integer.width = sizeof(short); + } + Arg(unsigned short j) : type(UINT) { + integer.i = j; + integer.width = sizeof(short); + } + Arg(signed int j) : type(INT) { + integer.i = j; + integer.width = sizeof(int); + } + Arg(unsigned int j) : type(UINT) { + integer.i = j; + integer.width = sizeof(int); + } + Arg(signed long j) : type(INT) { + integer.i = j; + integer.width = sizeof(long); + } + Arg(unsigned long j) : type(UINT) { + integer.i = j; + integer.width = sizeof(long); + } + Arg(signed long long j) : type(INT) { + integer.i = j; + integer.width = sizeof(long long); + } + Arg(unsigned long long j) : type(UINT) { + integer.i = j; + integer.width = sizeof(long long); + } + + // A C-style text string. + Arg(const char* s) : str(s), type(STRING) { } + Arg(char* s) : str(s), type(STRING) { } + + // Any pointer value that can be cast to a "void*". + template<class T> Arg(T* p) : ptr((void*)p), type(POINTER) { } + + union { + // An integer-like value. + struct { + int64_t i; + unsigned char width; + } integer; + + // A C-style text string. + const char* str; + + // A pointer to an arbitrary object. + const void* ptr; + }; + const enum Type type; +}; + +// This is the internal function that performs the actual formatting of +// an snprintf()-style format string. +BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, + const Arg* args, size_t max_args); + +#if !defined(NDEBUG) +// In debug builds, allow unit tests to artificially lower the kSSizeMax +// constant that is used as a hard upper-bound for all buffers. In normal +// use, this constant should always be std::numeric_limits<ssize_t>::max(). +BASE_EXPORT void SetSafeSPrintfSSizeMaxForTest(size_t max); +BASE_EXPORT size_t GetSafeSPrintfSSizeMaxForTest(); +#endif + +} // namespace internal + +template<typename... Args> +ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt, Args... args) { + // Use Arg() object to record type information and then copy arguments to an + // array to make it easier to iterate over them. + const internal::Arg arg_array[] = { args... }; + return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args)); +} + +template<size_t N, typename... Args> +ssize_t SafeSPrintf(char (&buf)[N], const char* fmt, Args... args) { + // Use Arg() object to record type information and then copy arguments to an + // array to make it easier to iterate over them. + const internal::Arg arg_array[] = { args... }; + return internal::SafeSNPrintf(buf, N, fmt, arg_array, sizeof...(args)); +} + +// Fast-path when we don't actually need to substitute any arguments. +BASE_EXPORT ssize_t SafeSNPrintf(char* buf, size_t N, const char* fmt); +template<size_t N> +inline ssize_t SafeSPrintf(char (&buf)[N], const char* fmt) { + return SafeSNPrintf(buf, N, fmt); +} + +} // namespace strings +} // namespace base + +#endif // BASE_STRINGS_SAFE_SPRINTF_H_ diff --git a/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc b/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc new file mode 100644 index 0000000000..bb9908f928 --- /dev/null +++ b/security/sandbox/chromium/base/strings/safe_sprintf_unittest.cc @@ -0,0 +1,765 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/safe_sprintf.h" + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <limits> +#include <memory> + +#include "base/logging.h" +#include "base/macros.h" +#include "build/build_config.h" +#include "testing/gtest/include/gtest/gtest.h" + +// Death tests on Android are currently very flaky. No need to add more flaky +// tests, as they just make it hard to spot real problems. +// TODO(markus): See if the restrictions on Android can eventually be lifted. +#if defined(GTEST_HAS_DEATH_TEST) && !defined(OS_ANDROID) +#define ALLOW_DEATH_TEST +#endif + +namespace base { +namespace strings { + +TEST(SafeSPrintfTest, Empty) { + char buf[2] = { 'X', 'X' }; + + // Negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), "")); + EXPECT_EQ('X', buf[0]); + EXPECT_EQ('X', buf[1]); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, "")); + EXPECT_EQ('X', buf[0]); + EXPECT_EQ('X', buf[1]); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(0, SafeSNPrintf(buf, 1, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; + + // A larger buffer should leave the trailing bytes unchanged. + EXPECT_EQ(0, SafeSNPrintf(buf, 2, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(0, SafeSPrintf(buf, "")); + EXPECT_EQ(0, buf[0]); + EXPECT_EQ('X', buf[1]); + buf[0] = 'X'; +} + +TEST(SafeSPrintfTest, NoArguments) { + // Output a text message that doesn't require any substitutions. This + // is roughly equivalent to calling strncpy() (but unlike strncpy(), it does + // always add a trailing NUL; it always deduplicates '%' characters). + static const char text[] = "hello world"; + char ref[20], buf[20]; + memset(ref, 'X', sizeof(ref)); + memcpy(buf, ref, sizeof(buf)); + + // A negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), text)); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, text)); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSNPrintf(buf, 1, text)); + EXPECT_EQ(0, buf[0]); + EXPECT_TRUE(!memcmp(buf+1, ref+1, sizeof(buf)-1)); + memcpy(buf, ref, sizeof(buf)); + + // A larger (but limited) buffer should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSNPrintf(buf, 2, text)); + EXPECT_EQ(text[0], buf[0]); + EXPECT_EQ(0, buf[1]); + EXPECT_TRUE(!memcmp(buf+2, ref+2, sizeof(buf)-2)); + memcpy(buf, ref, sizeof(buf)); + + // A unrestricted buffer length should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, sizeof(buf), text)); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSPrintf(buf, text)); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // Check for deduplication of '%' percent characters. + EXPECT_EQ(1, SafeSPrintf(buf, "%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%X")); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%%X")); +#if defined(NDEBUG) + EXPECT_EQ(1, SafeSPrintf(buf, "%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%")); + EXPECT_EQ(2, SafeSPrintf(buf, "%X")); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%X")); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%X"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%X"), "src.1. == '%'"); +#endif +} + +TEST(SafeSPrintfTest, OneArgument) { + // Test basic single-argument single-character substitution. + const char text[] = "hello world"; + const char fmt[] = "hello%cworld"; + char ref[20], buf[20]; + memset(ref, 'X', sizeof(buf)); + memcpy(buf, ref, sizeof(buf)); + + // A negative buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, static_cast<size_t>(-1), fmt, ' ')); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // Zero buffer size should always result in an error. + EXPECT_EQ(-1, SafeSNPrintf(buf, 0, fmt, ' ')); + EXPECT_TRUE(!memcmp(buf, ref, sizeof(buf))); + + // A one-byte buffer should always print a single NUL byte. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, 1, fmt, ' ')); + EXPECT_EQ(0, buf[0]); + EXPECT_TRUE(!memcmp(buf+1, ref+1, sizeof(buf)-1)); + memcpy(buf, ref, sizeof(buf)); + + // A larger (but limited) buffer should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, 2, fmt, ' ')); + EXPECT_EQ(text[0], buf[0]); + EXPECT_EQ(0, buf[1]); + EXPECT_TRUE(!memcmp(buf+2, ref+2, sizeof(buf)-2)); + memcpy(buf, ref, sizeof(buf)); + + // A unrestricted buffer length should always leave the trailing bytes + // unchanged. + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, + SafeSNPrintf(buf, sizeof(buf), fmt, ' ')); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // The same test using SafeSPrintf() instead of SafeSNPrintf(). + EXPECT_EQ(static_cast<ssize_t>(sizeof(text))-1, SafeSPrintf(buf, fmt, ' ')); + EXPECT_EQ(std::string(text), std::string(buf)); + EXPECT_TRUE(!memcmp(buf + sizeof(text), ref + sizeof(text), + sizeof(buf) - sizeof(text))); + memcpy(buf, ref, sizeof(buf)); + + // Check for deduplication of '%' percent characters. + EXPECT_EQ(1, SafeSPrintf(buf, "%%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%Y", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%Y", 0)); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%Y", 0)); + EXPECT_EQ(3, SafeSPrintf(buf, "%%%%Y", 0)); +#if defined(NDEBUG) + EXPECT_EQ(1, SafeSPrintf(buf, "%", 0)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%", 0)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%", 0), "ch"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%", 0), "ch"); +#endif +} + +TEST(SafeSPrintfTest, MissingArg) { +#if defined(NDEBUG) + char buf[20]; + EXPECT_EQ(3, SafeSPrintf(buf, "%c%c", 'A')); + EXPECT_EQ("A%c", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + char buf[20]; + EXPECT_DEATH(SafeSPrintf(buf, "%c%c", 'A'), "cur_arg < max_args"); +#endif +} + +TEST(SafeSPrintfTest, ASANFriendlyBufferTest) { + // Print into a buffer that is sized exactly to size. ASAN can verify that + // nobody attempts to write past the end of the buffer. + // There is a more complicated test in PrintLongString() that covers a lot + // more edge case, but it is also harder to debug in case of a failure. + const char kTestString[] = "This is a test"; + std::unique_ptr<char[]> buf(new char[sizeof(kTestString)]); + EXPECT_EQ(static_cast<ssize_t>(sizeof(kTestString) - 1), + SafeSNPrintf(buf.get(), sizeof(kTestString), kTestString)); + EXPECT_EQ(std::string(kTestString), std::string(buf.get())); + EXPECT_EQ(static_cast<ssize_t>(sizeof(kTestString) - 1), + SafeSNPrintf(buf.get(), sizeof(kTestString), "%s", kTestString)); + EXPECT_EQ(std::string(kTestString), std::string(buf.get())); +} + +TEST(SafeSPrintfTest, NArgs) { + // Pre-C++11 compilers have a different code path, that can only print + // up to ten distinct arguments. + // We test both SafeSPrintf() and SafeSNPrintf(). This makes sure we don't + // have typos in the copy-n-pasted code that is needed to deal with various + // numbers of arguments. + char buf[12]; + EXPECT_EQ(1, SafeSPrintf(buf, "%c", 1)); + EXPECT_EQ("\1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%c%c", 1, 2)); + EXPECT_EQ("\1\2", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%c%c%c", 1, 2, 3)); + EXPECT_EQ("\1\2\3", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%c%c%c%c", 1, 2, 3, 4)); + EXPECT_EQ("\1\2\3\4", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%c%c%c%c%c", 1, 2, 3, 4, 5)); + EXPECT_EQ("\1\2\3\4\5", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6)); + EXPECT_EQ("\1\2\3\4\5\6", std::string(buf)); + EXPECT_EQ(7, SafeSPrintf(buf, "%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7)); + EXPECT_EQ("\1\2\3\4\5\6\7", std::string(buf)); + EXPECT_EQ(8, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ("\1\2\3\4\5\6\7\10", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + + // Repeat all the tests with SafeSNPrintf() instead of SafeSPrintf(). + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12", std::string(buf)); + EXPECT_EQ(1, SafeSNPrintf(buf, 11, "%c", 1)); + EXPECT_EQ("\1", std::string(buf)); + EXPECT_EQ(2, SafeSNPrintf(buf, 11, "%c%c", 1, 2)); + EXPECT_EQ("\1\2", std::string(buf)); + EXPECT_EQ(3, SafeSNPrintf(buf, 11, "%c%c%c", 1, 2, 3)); + EXPECT_EQ("\1\2\3", std::string(buf)); + EXPECT_EQ(4, SafeSNPrintf(buf, 11, "%c%c%c%c", 1, 2, 3, 4)); + EXPECT_EQ("\1\2\3\4", std::string(buf)); + EXPECT_EQ(5, SafeSNPrintf(buf, 11, "%c%c%c%c%c", 1, 2, 3, 4, 5)); + EXPECT_EQ("\1\2\3\4\5", std::string(buf)); + EXPECT_EQ(6, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6)); + EXPECT_EQ("\1\2\3\4\5\6", std::string(buf)); + EXPECT_EQ(7, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c", 1, 2, 3, 4, 5, 6, 7)); + EXPECT_EQ("\1\2\3\4\5\6\7", std::string(buf)); + EXPECT_EQ(8, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8)); + EXPECT_EQ("\1\2\3\4\5\6\7\10", std::string(buf)); + EXPECT_EQ(9, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11", std::string(buf)); + EXPECT_EQ(10, SafeSNPrintf(buf, 11, "%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12", std::string(buf)); + + EXPECT_EQ(11, SafeSPrintf(buf, "%c%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12\13", std::string(buf)); + EXPECT_EQ(11, SafeSNPrintf(buf, 12, "%c%c%c%c%c%c%c%c%c%c%c", + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); + EXPECT_EQ("\1\2\3\4\5\6\7\10\11\12\13", std::string(buf)); +} + +TEST(SafeSPrintfTest, DataTypes) { + char buf[40]; + + // Bytes + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint8_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%d", (uint8_t)-1)); + EXPECT_EQ("255", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int8_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int8_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%d", (int8_t)-128)); + EXPECT_EQ("-128", std::string(buf)); + + // Half-words + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint16_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%d", (uint16_t)-1)); + EXPECT_EQ("65535", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int16_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int16_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%d", (int16_t)-32768)); + EXPECT_EQ("-32768", std::string(buf)); + + // Words + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint32_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%d", (uint32_t)-1)); + EXPECT_EQ("4294967295", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int32_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int32_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + // Work-around for an limitation of C90 + EXPECT_EQ(11, SafeSPrintf(buf, "%d", (int32_t)-2147483647-1)); + EXPECT_EQ("-2147483648", std::string(buf)); + + // Quads + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (uint64_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(20, SafeSPrintf(buf, "%d", (uint64_t)-1)); + EXPECT_EQ("18446744073709551615", std::string(buf)); + EXPECT_EQ(1, SafeSPrintf(buf, "%d", (int64_t)1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%d", (int64_t)-1)); + EXPECT_EQ("-1", std::string(buf)); + // Work-around for an limitation of C90 + EXPECT_EQ(20, SafeSPrintf(buf, "%d", (int64_t)-9223372036854775807LL-1)); + EXPECT_EQ("-9223372036854775808", std::string(buf)); + + // Strings (both const and mutable). + EXPECT_EQ(4, SafeSPrintf(buf, "test")); + EXPECT_EQ("test", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, buf)); + EXPECT_EQ("test", std::string(buf)); + + // Pointer + char addr[20]; + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf); + SafeSPrintf(buf, "%p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + SafeSPrintf(buf, "%p", (const char *)buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)sprintf); + SafeSPrintf(buf, "%p", sprintf); + EXPECT_EQ(std::string(addr), std::string(buf)); + + // Padding for pointers is a little more complicated because of the "0x" + // prefix. Padding with '0' zeros is relatively straight-forward, but + // padding with ' ' spaces requires more effort. + sprintf(addr, "0x%017llX", (unsigned long long)(uintptr_t)buf); + SafeSPrintf(buf, "%019p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); + sprintf(addr, "0x%llX", (unsigned long long)(uintptr_t)buf); + memset(addr, ' ', + (char*)memmove(addr + sizeof(addr) - strlen(addr) - 1, + addr, strlen(addr)+1) - addr); + SafeSPrintf(buf, "%19p", buf); + EXPECT_EQ(std::string(addr), std::string(buf)); +} + +namespace { +void PrintLongString(char* buf, size_t sz) { + // Output a reasonably complex expression into a limited-size buffer. + // At least one byte is available for writing the NUL character. + CHECK_GT(sz, static_cast<size_t>(0)); + + // Allocate slightly more space, so that we can verify that SafeSPrintf() + // never writes past the end of the buffer. + std::unique_ptr<char[]> tmp(new char[sz + 2]); + memset(tmp.get(), 'X', sz+2); + + // Use SafeSPrintf() to output a complex list of arguments: + // - test padding and truncating %c single characters. + // - test truncating %s simple strings. + // - test mismatching arguments and truncating (for %d != %s). + // - test zero-padding and truncating %x hexadecimal numbers. + // - test outputting and truncating %d MININT. + // - test outputting and truncating %p arbitrary pointer values. + // - test outputting, padding and truncating NULL-pointer %s strings. + char* out = tmp.get(); + size_t out_sz = sz; + size_t len; + for (std::unique_ptr<char[]> perfect_buf;;) { + size_t needed = + SafeSNPrintf(out, out_sz, +#if defined(NDEBUG) + "A%2cong %s: %d %010X %d %p%7s", 'l', "string", "", +#else + "A%2cong %s: %%d %010X %d %p%7s", 'l', "string", +#endif + 0xDEADBEEF, std::numeric_limits<intptr_t>::min(), + PrintLongString, static_cast<char*>(nullptr)) + + 1; + + // Various sanity checks: + // The numbered of characters needed to print the full string should always + // be bigger or equal to the bytes that have actually been output. + len = strlen(tmp.get()); + CHECK_GE(needed, len+1); + + // The number of characters output should always fit into the buffer that + // was passed into SafeSPrintf(). + CHECK_LT(len, out_sz); + + // The output is always terminated with a NUL byte (actually, this test is + // always going to pass, as strlen() already verified this) + EXPECT_FALSE(tmp[len]); + + // ASAN can check that we are not overwriting buffers, iff we make sure the + // buffer is exactly the size that we are expecting to be written. After + // running SafeSNPrintf() the first time, it is possible to compute the + // correct buffer size for this test. So, allocate a second buffer and run + // the exact same SafeSNPrintf() command again. + if (!perfect_buf.get()) { + out_sz = std::min(needed, sz); + out = new char[out_sz]; + perfect_buf.reset(out); + } else { + break; + } + } + + // All trailing bytes are unchanged. + for (size_t i = len+1; i < sz+2; ++i) + EXPECT_EQ('X', tmp[i]); + + // The text that was generated by SafeSPrintf() should always match the + // equivalent text generated by sprintf(). Please note that the format + // string for sprintf() is not complicated, as it does not have the + // benefit of getting type information from the C++ compiler. + // + // N.B.: It would be so much cleaner to use snprintf(). But unfortunately, + // Visual Studio doesn't support this function, and the work-arounds + // are all really awkward. + char ref[256]; + CHECK_LE(sz, sizeof(ref)); + sprintf(ref, "A long string: %%d 00DEADBEEF %lld 0x%llX <NULL>", + static_cast<long long>(std::numeric_limits<intptr_t>::min()), + static_cast<unsigned long long>( + reinterpret_cast<uintptr_t>(PrintLongString))); + ref[sz-1] = '\000'; + +#if defined(NDEBUG) + const size_t kSSizeMax = std::numeric_limits<ssize_t>::max(); +#else + const size_t kSSizeMax = internal::GetSafeSPrintfSSizeMaxForTest(); +#endif + + // Compare the output from SafeSPrintf() to the one from sprintf(). + EXPECT_EQ(std::string(ref).substr(0, kSSizeMax-1), std::string(tmp.get())); + + // We allocated a slightly larger buffer, so that we could perform some + // extra sanity checks. Now that the tests have all passed, we copy the + // data to the output buffer that the caller provided. + memcpy(buf, tmp.get(), len+1); +} + +#if !defined(NDEBUG) +class ScopedSafeSPrintfSSizeMaxSetter { + public: + ScopedSafeSPrintfSSizeMaxSetter(size_t sz) { + old_ssize_max_ = internal::GetSafeSPrintfSSizeMaxForTest(); + internal::SetSafeSPrintfSSizeMaxForTest(sz); + } + + ~ScopedSafeSPrintfSSizeMaxSetter() { + internal::SetSafeSPrintfSSizeMaxForTest(old_ssize_max_); + } + + private: + size_t old_ssize_max_; + + DISALLOW_COPY_AND_ASSIGN(ScopedSafeSPrintfSSizeMaxSetter); +}; +#endif + +} // anonymous namespace + +TEST(SafeSPrintfTest, Truncation) { + // We use PrintLongString() to print a complex long string and then + // truncate to all possible lengths. This ends up exercising a lot of + // different code paths in SafeSPrintf() and IToASCII(), as truncation can + // happen in a lot of different states. + char ref[256]; + PrintLongString(ref, sizeof(ref)); + for (size_t i = strlen(ref)+1; i; --i) { + char buf[sizeof(ref)]; + PrintLongString(buf, i); + EXPECT_EQ(std::string(ref, i - 1), std::string(buf)); + } + + // When compiling in debug mode, we have the ability to fake a small + // upper limit for the maximum value that can be stored in an ssize_t. + // SafeSPrintf() uses this upper limit to determine how many bytes it will + // write to the buffer, even if the caller claimed a bigger buffer size. + // Repeat the truncation test and verify that this other code path in + // SafeSPrintf() works correctly, too. +#if !defined(NDEBUG) + for (size_t i = strlen(ref)+1; i > 1; --i) { + ScopedSafeSPrintfSSizeMaxSetter ssize_max_setter(i); + char buf[sizeof(ref)]; + PrintLongString(buf, sizeof(buf)); + EXPECT_EQ(std::string(ref, i - 1), std::string(buf)); + } + + // kSSizeMax is also used to constrain the maximum amount of padding, before + // SafeSPrintf() detects an error in the format string. + ScopedSafeSPrintfSSizeMaxSetter ssize_max_setter(100); + char buf[256]; + EXPECT_EQ(99, SafeSPrintf(buf, "%99c", ' ')); + EXPECT_EQ(std::string(99, ' '), std::string(buf)); + *buf = '\000'; +#if defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%100c", ' '), "padding <= max_padding"); +#endif + EXPECT_EQ(0, *buf); +#endif +} + +TEST(SafeSPrintfTest, Padding) { + char buf[40], fmt[40]; + + // Chars %c + EXPECT_EQ(1, SafeSPrintf(buf, "%c", 'A')); + EXPECT_EQ("A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2c", 'A')); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02c", 'A')); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2c", 'A')); + EXPECT_EQ("%-2c", std::string(buf)); + SafeSPrintf(fmt, "%%%dc", std::numeric_limits<ssize_t>::max() - 1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, SafeSPrintf(buf, fmt, 'A')); + SafeSPrintf(fmt, "%%%dc", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 'A')); + EXPECT_EQ("%c", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 'A'), "padding <= max_padding"); +#endif + + // Octal %o + EXPECT_EQ(1, SafeSPrintf(buf, "%o", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2o", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02o", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(12, SafeSPrintf(buf, "%12o", -1)); + EXPECT_EQ(" 37777777777", std::string(buf)); + EXPECT_EQ(12, SafeSPrintf(buf, "%012o", -1)); + EXPECT_EQ("037777777777", std::string(buf)); + EXPECT_EQ(23, SafeSPrintf(buf, "%23o", -1LL)); + EXPECT_EQ(" 1777777777777777777777", std::string(buf)); + EXPECT_EQ(23, SafeSPrintf(buf, "%023o", -1LL)); + EXPECT_EQ("01777777777777777777777", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2o", 0111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2o", 1)); + EXPECT_EQ("%-2o", std::string(buf)); + SafeSPrintf(fmt, "%%%do", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%do", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%do", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%o", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Decimals %d + EXPECT_EQ(1, SafeSPrintf(buf, "%d", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2d", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02d", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%3d", -1)); + EXPECT_EQ(" -1", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%03d", -1)); + EXPECT_EQ("-01", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2d", 111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%2d", -111)); + EXPECT_EQ("-111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2d", 1)); + EXPECT_EQ("%-2d", std::string(buf)); + SafeSPrintf(fmt, "%%%dd", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dd", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%dd", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%d", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Hex %X + EXPECT_EQ(1, SafeSPrintf(buf, "%X", 1)); + EXPECT_EQ("1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2X", 1)); + EXPECT_EQ(" 1", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02X", 1)); + EXPECT_EQ("01", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%9X", -1)); + EXPECT_EQ(" FFFFFFFF", std::string(buf)); + EXPECT_EQ(9, SafeSPrintf(buf, "%09X", -1)); + EXPECT_EQ("0FFFFFFFF", std::string(buf)); + EXPECT_EQ(17, SafeSPrintf(buf, "%17X", -1LL)); + EXPECT_EQ(" FFFFFFFFFFFFFFFF", std::string(buf)); + EXPECT_EQ(17, SafeSPrintf(buf, "%017X", -1LL)); + EXPECT_EQ("0FFFFFFFFFFFFFFFF", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2X", 0x111)); + EXPECT_EQ("111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2X", 1)); + EXPECT_EQ("%-2X", std::string(buf)); + SafeSPrintf(fmt, "%%%dX", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dX", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, 1)); + EXPECT_EQ("000", std::string(buf)); + SafeSPrintf(fmt, "%%%dX", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%X", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // Pointer %p + EXPECT_EQ(3, SafeSPrintf(buf, "%p", (void*)1)); + EXPECT_EQ("0x1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%4p", (void*)1)); + EXPECT_EQ(" 0x1", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%04p", (void*)1)); + EXPECT_EQ("0x01", std::string(buf)); + EXPECT_EQ(5, SafeSPrintf(buf, "%4p", (void*)0x111)); + EXPECT_EQ("0x111", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2p", (void*)1)); + EXPECT_EQ("%-2p", std::string(buf)); + SafeSPrintf(fmt, "%%%dp", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, (void*)1)); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%dp", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, (void*)1)); + EXPECT_EQ("0x0", std::string(buf)); + SafeSPrintf(fmt, "%%%dp", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, 1)); + EXPECT_EQ("%p", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, 1), "padding <= max_padding"); +#endif + + // String + EXPECT_EQ(1, SafeSPrintf(buf, "%s", "A")); + EXPECT_EQ("A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%2s", "A")); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%02s", "A")); + EXPECT_EQ(" A", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%2s", "AAA")); + EXPECT_EQ("AAA", std::string(buf)); + EXPECT_EQ(4, SafeSPrintf(buf, "%-2s", "A")); + EXPECT_EQ("%-2s", std::string(buf)); + SafeSPrintf(fmt, "%%%ds", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, "A")); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%0%ds", std::numeric_limits<ssize_t>::max()-1); + EXPECT_EQ(std::numeric_limits<ssize_t>::max()-1, + SafeSNPrintf(buf, 4, fmt, "A")); + EXPECT_EQ(" ", std::string(buf)); + SafeSPrintf(fmt, "%%%ds", + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, fmt, "A")); + EXPECT_EQ("%s", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, fmt, "A"), "padding <= max_padding"); +#endif +} + +TEST(SafeSPrintfTest, EmbeddedNul) { + char buf[] = { 'X', 'X', 'X', 'X' }; + EXPECT_EQ(2, SafeSPrintf(buf, "%3c", 0)); + EXPECT_EQ(' ', buf[0]); + EXPECT_EQ(' ', buf[1]); + EXPECT_EQ(0, buf[2]); + EXPECT_EQ('X', buf[3]); + + // Check handling of a NUL format character. N.B. this takes two different + // code paths depending on whether we are actually passing arguments. If + // we don't have any arguments, we are running in the fast-path code, that + // looks (almost) like a strncpy(). +#if defined(NDEBUG) + EXPECT_EQ(2, SafeSPrintf(buf, "%%%")); + EXPECT_EQ("%%", std::string(buf)); + EXPECT_EQ(2, SafeSPrintf(buf, "%%%", 0)); + EXPECT_EQ("%%", std::string(buf)); +#elif defined(ALLOW_DEATH_TEST) + EXPECT_DEATH(SafeSPrintf(buf, "%%%"), "src.1. == '%'"); + EXPECT_DEATH(SafeSPrintf(buf, "%%%", 0), "ch"); +#endif +} + +TEST(SafeSPrintfTest, EmitNULL) { + char buf[40]; +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion-null" +#endif + EXPECT_EQ(1, SafeSPrintf(buf, "%d", NULL)); + EXPECT_EQ("0", std::string(buf)); + EXPECT_EQ(3, SafeSPrintf(buf, "%p", NULL)); + EXPECT_EQ("0x0", std::string(buf)); + EXPECT_EQ(6, SafeSPrintf(buf, "%s", NULL)); + EXPECT_EQ("<NULL>", std::string(buf)); +#if defined(__GCC__) +#pragma GCC diagnostic pop +#endif +} + +TEST(SafeSPrintfTest, PointerSize) { + // The internal data representation is a 64bit value, independent of the + // native word size. We want to perform sign-extension for signed integers, + // but we want to avoid doing so for pointer types. This could be a + // problem on systems, where pointers are only 32bit. This tests verifies + // that there is no such problem. + char *str = reinterpret_cast<char *>(0x80000000u); + void *ptr = str; + char buf[40]; + EXPECT_EQ(10, SafeSPrintf(buf, "%p", str)); + EXPECT_EQ("0x80000000", std::string(buf)); + EXPECT_EQ(10, SafeSPrintf(buf, "%p", ptr)); + EXPECT_EQ("0x80000000", std::string(buf)); +} + +} // namespace strings +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string16.cc b/security/sandbox/chromium/base/strings/string16.cc new file mode 100644 index 0000000000..84962e6710 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string16.cc @@ -0,0 +1,87 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string16.h" + +#if defined(WCHAR_T_IS_UTF16) && !defined(_AIX) + +#error This file should not be used on 2-byte wchar_t systems +// If this winds up being needed on 2-byte wchar_t systems, either the +// definitions below can be used, or the host system's wide character +// functions like wmemcmp can be wrapped. + +#elif defined(WCHAR_T_IS_UTF32) + +#include <ostream> + +#include "base/strings/string_piece.h" + +namespace base { + +int c16memcmp(const char16* s1, const char16* s2, size_t n) { + // We cannot call memcmp because that changes the semantics. + while (n-- > 0) { + if (*s1 != *s2) { + // We cannot use (*s1 - *s2) because char16 is unsigned. + return ((*s1 < *s2) ? -1 : 1); + } + ++s1; + ++s2; + } + return 0; +} + +size_t c16len(const char16* s) { + const char16 *s_orig = s; + while (*s) { + ++s; + } + return s - s_orig; +} + +const char16* c16memchr(const char16* s, char16 c, size_t n) { + while (n-- > 0) { + if (*s == c) { + return s; + } + ++s; + } + return nullptr; +} + +char16* c16memmove(char16* s1, const char16* s2, size_t n) { + return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16))); +} + +char16* c16memcpy(char16* s1, const char16* s2, size_t n) { + return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16))); +} + +char16* c16memset(char16* s, char16 c, size_t n) { + char16 *s_orig = s; + while (n-- > 0) { + *s = c; + ++s; + } + return s_orig; +} + +namespace string16_internals { + +std::ostream& operator<<(std::ostream& out, const string16& str) { + return out << base::StringPiece16(str); +} + +void PrintTo(const string16& str, std::ostream* out) { + *out << str; +} + +} // namespace string16_internals + +} // namespace base + +template class std:: + basic_string<base::char16, base::string16_internals::string16_char_traits>; + +#endif // WCHAR_T_IS_UTF32 diff --git a/security/sandbox/chromium/base/strings/string16.h b/security/sandbox/chromium/base/strings/string16.h new file mode 100644 index 0000000000..3cb6c7c495 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string16.h @@ -0,0 +1,229 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING16_H_ +#define BASE_STRINGS_STRING16_H_ + +// WHAT: +// A version of std::basic_string that provides 2-byte characters even when +// wchar_t is not implemented as a 2-byte type. You can access this class as +// string16. We also define char16, which string16 is based upon. +// +// WHY: +// On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 +// data. Plenty of existing code operates on strings encoded as UTF-16. +// +// On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make +// it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails +// at run time, because it calls some functions (like wcslen) that come from +// the system's native C library -- which was built with a 4-byte wchar_t! +// It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's +// entirely improper on those systems where the encoding of wchar_t is defined +// as UTF-32. +// +// Here, we define string16, which is similar to std::wstring but replaces all +// libc functions with custom, 2-byte-char compatible routines. It is capable +// of carrying UTF-16-encoded data. + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include <functional> +#include <string> + +#include "base/base_export.h" +#include "build/build_config.h" + +#if defined(WCHAR_T_IS_UTF16) + +// Define a macro for wrapping construction of char16 arrays and string16s from +// a literal string. This indirection allows for an easier migration of +// base::char16 to char16_t on platforms where WCHAR_T_IS_UTF16, as only a one +// character change to the macro will be necessary. +// This macro does not exist when WCHAR_T_IS_UTF32, as it is currently not +// possible to create a char array form a literal in this case. +// TODO(https://crbug.com/911896): Remove this macro once base::char16 is +// char16_t on all platforms. +#define STRING16_LITERAL(x) L##x + +namespace base { + +typedef wchar_t char16; +typedef std::wstring string16; + +} // namespace base + +#elif defined(WCHAR_T_IS_UTF32) + +#include <wchar.h> // for mbstate_t + +namespace base { + +typedef uint16_t char16; + +// char16 versions of the functions required by string16_char_traits; these +// are based on the wide character functions of similar names ("w" or "wcs" +// instead of "c16"). +BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); +BASE_EXPORT size_t c16len(const char16* s); +BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); +BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); +BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); + +// This namespace contains the implementation of base::string16 along with +// things that need to be found via argument-dependent lookup from a +// base::string16. +namespace string16_internals { + +struct string16_char_traits { + typedef char16 char_type; + typedef int int_type; + + // int_type needs to be able to hold each possible value of char_type, and in + // addition, the distinct value of eof(). + static_assert(sizeof(int_type) > sizeof(char_type), + "int must be larger than 16 bits wide"); + + typedef std::streamoff off_type; + typedef mbstate_t state_type; + typedef std::fpos<state_type> pos_type; + + static void assign(char_type& c1, const char_type& c2) { + c1 = c2; + } + + static bool eq(const char_type& c1, const char_type& c2) { + return c1 == c2; + } + static bool lt(const char_type& c1, const char_type& c2) { + return c1 < c2; + } + + static int compare(const char_type* s1, const char_type* s2, size_t n) { + return c16memcmp(s1, s2, n); + } + + static size_t length(const char_type* s) { + return c16len(s); + } + + static const char_type* find(const char_type* s, size_t n, + const char_type& a) { + return c16memchr(s, a, n); + } + + static char_type* move(char_type* s1, const char_type* s2, size_t n) { + return c16memmove(s1, s2, n); + } + + static char_type* copy(char_type* s1, const char_type* s2, size_t n) { + return c16memcpy(s1, s2, n); + } + + static char_type* assign(char_type* s, size_t n, char_type a) { + return c16memset(s, a, n); + } + + static int_type not_eof(const int_type& c) { + return eq_int_type(c, eof()) ? 0 : c; + } + + static char_type to_char_type(const int_type& c) { + return char_type(c); + } + + static int_type to_int_type(const char_type& c) { + return int_type(c); + } + + static bool eq_int_type(const int_type& c1, const int_type& c2) { + return c1 == c2; + } + + static int_type eof() { + return static_cast<int_type>(EOF); + } +}; + +} // namespace string16_internals + +typedef std::basic_string<char16, + base::string16_internals::string16_char_traits> + string16; + +namespace string16_internals { + +BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, + const string16& str); + +// This is required by googletest to print a readable output on test failures. +BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); + +} // namespace string16_internals + +} // namespace base + +// The string class will be explicitly instantiated only once, in string16.cc. +// +// std::basic_string<> in GNU libstdc++ contains a static data member, +// _S_empty_rep_storage, to represent empty strings. When an operation such +// as assignment or destruction is performed on a string, causing its existing +// data member to be invalidated, it must not be freed if this static data +// member is being used. Otherwise, it counts as an attempt to free static +// (and not allocated) data, which is a memory error. +// +// Generally, due to C++ template magic, _S_empty_rep_storage will be marked +// as a coalesced symbol, meaning that the linker will combine multiple +// instances into a single one when generating output. +// +// If a string class is used by multiple shared libraries, a problem occurs. +// Each library will get its own copy of _S_empty_rep_storage. When strings +// are passed across a library boundary for alteration or destruction, memory +// errors will result. GNU libstdc++ contains a configuration option, +// --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which +// disables the static data member optimization, but it's a good optimization +// and non-STL code is generally at the mercy of the system's STL +// configuration. Fully-dynamic strings are not the default for GNU libstdc++ +// libstdc++ itself or for the libstdc++ installations on the systems we care +// about, such as Mac OS X and relevant flavors of Linux. +// +// See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . +// +// To avoid problems, string classes need to be explicitly instantiated only +// once, in exactly one library. All other string users see it via an "extern" +// declaration. This is precisely how GNU libstdc++ handles +// std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring). +// +// This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), +// in which the linker does not fully coalesce symbols when dead code +// stripping is enabled. This bug causes the memory errors described above +// to occur even when a std::basic_string<> does not cross shared library +// boundaries, such as in statically-linked executables. +// +// TODO(mark): File this bug with Apple and update this note with a bug number. + +extern template class BASE_EXPORT + std::basic_string<base::char16, + base::string16_internals::string16_char_traits>; + +// Specialize std::hash for base::string16. Although the style guide forbids +// this in general, it is necessary for consistency with WCHAR_T_IS_UTF16 +// platforms, where base::string16 is a type alias for std::wstring. +namespace std { +template <> +struct hash<base::string16> { + std::size_t operator()(const base::string16& s) const { + std::size_t result = 0; + for (base::char16 c : s) + result = (result * 131) + c; + return result; + } +}; +} // namespace std + +#endif // WCHAR_T_IS_UTF32 + +#endif // BASE_STRINGS_STRING16_H_ diff --git a/security/sandbox/chromium/base/strings/string_number_conversions.cc b/security/sandbox/chromium/base/strings/string_number_conversions.cc new file mode 100644 index 0000000000..8b71b0ae11 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_number_conversions.cc @@ -0,0 +1,545 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_number_conversions.h" + +#include <ctype.h> +#include <errno.h> +#include <stdlib.h> +#include <wctype.h> + +#include <limits> +#include <type_traits> + +#include "base/logging.h" +#include "base/no_destructor.h" +#include "base/numerics/safe_math.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "base/third_party/double_conversion/double-conversion/double-conversion.h" + +namespace base { + +namespace { + +template <typename STR, typename INT> +struct IntToStringT { + static STR IntToString(INT value) { + // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. + // So round up to allocate 3 output characters per byte, plus 1 for '-'. + const size_t kOutputBufSize = + 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; + + // Create the string in a temporary buffer, write it back to front, and + // then return the substr of what we ended up using. + using CHR = typename STR::value_type; + CHR outbuf[kOutputBufSize]; + + // The ValueOrDie call below can never fail, because UnsignedAbs is valid + // for all valid inputs. + typename std::make_unsigned<INT>::type res = + CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); + + CHR* end = outbuf + kOutputBufSize; + CHR* i = end; + do { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>((res % 10) + '0'); + res /= 10; + } while (res != 0); + if (IsValueNegative(value)) { + --i; + DCHECK(i != outbuf); + *i = static_cast<CHR>('-'); + } + return STR(i, end); + } +}; + +// Utility to convert a character to a digit in a given base +template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { +}; + +// Faster specialization for bases <= 10 +template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { + public: + static bool Convert(CHAR c, uint8_t* digit) { + if (c >= '0' && c < '0' + BASE) { + *digit = static_cast<uint8_t>(c - '0'); + return true; + } + return false; + } +}; + +// Specialization for bases where 10 < base <= 36 +template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { + public: + static bool Convert(CHAR c, uint8_t* digit) { + if (c >= '0' && c <= '9') { + *digit = c - '0'; + } else if (c >= 'a' && c < 'a' + BASE - 10) { + *digit = c - 'a' + 10; + } else if (c >= 'A' && c < 'A' + BASE - 10) { + *digit = c - 'A' + 10; + } else { + return false; + } + return true; + } +}; + +template <int BASE, typename CHAR> +bool CharToDigit(CHAR c, uint8_t* digit) { + return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); +} + +// There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it +// is locale independent, whereas the functions we are replacing were +// locale-dependent. TBD what is desired, but for the moment let's not +// introduce a change in behaviour. +template<typename CHAR> class WhitespaceHelper { +}; + +template<> class WhitespaceHelper<char> { + public: + static bool Invoke(char c) { + return 0 != isspace(static_cast<unsigned char>(c)); + } +}; + +template<> class WhitespaceHelper<char16> { + public: + static bool Invoke(char16 c) { + return 0 != iswspace(c); + } +}; + +template<typename CHAR> bool LocalIsWhitespace(CHAR c) { + return WhitespaceHelper<CHAR>::Invoke(c); +} + +// IteratorRangeToNumberTraits should provide: +// - a typedef for iterator_type, the iterator type used as input. +// - a typedef for value_type, the target numeric type. +// - static functions min, max (returning the minimum and maximum permitted +// values) +// - constant kBase, the base in which to interpret the input +template<typename IteratorRangeToNumberTraits> +class IteratorRangeToNumber { + public: + typedef IteratorRangeToNumberTraits traits; + typedef typename traits::iterator_type const_iterator; + typedef typename traits::value_type value_type; + + // Generalized iterator-range-to-number conversion. + // + static bool Invoke(const_iterator begin, + const_iterator end, + value_type* output) { + bool valid = true; + + while (begin != end && LocalIsWhitespace(*begin)) { + valid = false; + ++begin; + } + + if (begin != end && *begin == '-') { + if (!std::numeric_limits<value_type>::is_signed) { + *output = 0; + valid = false; + } else if (!Negative::Invoke(begin + 1, end, output)) { + valid = false; + } + } else { + if (begin != end && *begin == '+') { + ++begin; + } + if (!Positive::Invoke(begin, end, output)) { + valid = false; + } + } + + return valid; + } + + private: + // Sign provides: + // - a static function, CheckBounds, that determines whether the next digit + // causes an overflow/underflow + // - a static function, Increment, that appends the next digit appropriately + // according to the sign of the number being parsed. + template<typename Sign> + class Base { + public: + static bool Invoke(const_iterator begin, const_iterator end, + typename traits::value_type* output) { + *output = 0; + + if (begin == end) { + return false; + } + + // Note: no performance difference was found when using template + // specialization to remove this check in bases other than 16 + if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && + (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { + begin += 2; + } + + for (const_iterator current = begin; current != end; ++current) { + uint8_t new_digit = 0; + + if (!CharToDigit<traits::kBase>(*current, &new_digit)) { + return false; + } + + if (current != begin) { + if (!Sign::CheckBounds(output, new_digit)) { + return false; + } + *output *= traits::kBase; + } + + Sign::Increment(new_digit, output); + } + return true; + } + }; + + class Positive : public Base<Positive> { + public: + static bool CheckBounds(value_type* output, uint8_t new_digit) { + if (*output > static_cast<value_type>(traits::max() / traits::kBase) || + (*output == static_cast<value_type>(traits::max() / traits::kBase) && + new_digit > traits::max() % traits::kBase)) { + *output = traits::max(); + return false; + } + return true; + } + static void Increment(uint8_t increment, value_type* output) { + *output += increment; + } + }; + + class Negative : public Base<Negative> { + public: + static bool CheckBounds(value_type* output, uint8_t new_digit) { + if (*output < traits::min() / traits::kBase || + (*output == traits::min() / traits::kBase && + new_digit > 0 - traits::min() % traits::kBase)) { + *output = traits::min(); + return false; + } + return true; + } + static void Increment(uint8_t increment, value_type* output) { + *output -= increment; + } + }; +}; + +template<typename ITERATOR, typename VALUE, int BASE> +class BaseIteratorRangeToNumberTraits { + public: + typedef ITERATOR iterator_type; + typedef VALUE value_type; + static value_type min() { + return std::numeric_limits<value_type>::min(); + } + static value_type max() { + return std::numeric_limits<value_type>::max(); + } + static const int kBase = BASE; +}; + +template<typename ITERATOR> +class BaseHexIteratorRangeToIntTraits + : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { +}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToUIntTraits + : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToInt64Traits + : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {}; + +template <typename ITERATOR> +class BaseHexIteratorRangeToUInt64Traits + : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {}; + +typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> + HexIteratorRangeToIntTraits; + +typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator> + HexIteratorRangeToUIntTraits; + +typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> + HexIteratorRangeToInt64Traits; + +typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator> + HexIteratorRangeToUInt64Traits; + +template <typename VALUE, int BASE> +class StringPieceToNumberTraits + : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, + VALUE, + BASE> { +}; + +template <typename VALUE> +bool StringToIntImpl(StringPiece input, VALUE* output) { + return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( + input.begin(), input.end(), output); +} + +template <typename VALUE, int BASE> +class StringPiece16ToNumberTraits + : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, + VALUE, + BASE> { +}; + +template <typename VALUE> +bool String16ToIntImpl(StringPiece16 input, VALUE* output) { + return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( + input.begin(), input.end(), output); +} + +} // namespace + +std::string NumberToString(int value) { + return IntToStringT<std::string, int>::IntToString(value); +} + +string16 NumberToString16(int value) { + return IntToStringT<string16, int>::IntToString(value); +} + +std::string NumberToString(unsigned value) { + return IntToStringT<std::string, unsigned>::IntToString(value); +} + +string16 NumberToString16(unsigned value) { + return IntToStringT<string16, unsigned>::IntToString(value); +} + +std::string NumberToString(long value) { + return IntToStringT<std::string, long>::IntToString(value); +} + +string16 NumberToString16(long value) { + return IntToStringT<string16, long>::IntToString(value); +} + +std::string NumberToString(unsigned long value) { + return IntToStringT<std::string, unsigned long>::IntToString(value); +} + +string16 NumberToString16(unsigned long value) { + return IntToStringT<string16, unsigned long>::IntToString(value); +} + +std::string NumberToString(long long value) { + return IntToStringT<std::string, long long>::IntToString(value); +} + +string16 NumberToString16(long long value) { + return IntToStringT<string16, long long>::IntToString(value); +} + +std::string NumberToString(unsigned long long value) { + return IntToStringT<std::string, unsigned long long>::IntToString(value); +} + +string16 NumberToString16(unsigned long long value) { + return IntToStringT<string16, unsigned long long>::IntToString(value); +} + +static const double_conversion::DoubleToStringConverter* +GetDoubleToStringConverter() { + static NoDestructor<double_conversion::DoubleToStringConverter> converter( + double_conversion::DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, + nullptr, nullptr, 'e', -6, 12, 0, 0); + return converter.get(); +} + +std::string NumberToString(double value) { + char buffer[32]; + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); + return std::string(buffer, builder.position()); +} + +base::string16 NumberToString16(double value) { + char buffer[32]; + double_conversion::StringBuilder builder(buffer, sizeof(buffer)); + GetDoubleToStringConverter()->ToShortest(value, &builder); + + // The number will be ASCII. This creates the string using the "input + // iterator" variant which promotes from 8-bit to 16-bit via "=". + return base::string16(&buffer[0], &buffer[builder.position()]); +} + +bool StringToInt(StringPiece input, int* output) { + return StringToIntImpl(input, output); +} + +bool StringToInt(StringPiece16 input, int* output) { + return String16ToIntImpl(input, output); +} + +bool StringToUint(StringPiece input, unsigned* output) { + return StringToIntImpl(input, output); +} + +bool StringToUint(StringPiece16 input, unsigned* output) { + return String16ToIntImpl(input, output); +} + +bool StringToInt64(StringPiece input, int64_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToInt64(StringPiece16 input, int64_t* output) { + return String16ToIntImpl(input, output); +} + +bool StringToUint64(StringPiece input, uint64_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToUint64(StringPiece16 input, uint64_t* output) { + return String16ToIntImpl(input, output); +} + +bool StringToSizeT(StringPiece input, size_t* output) { + return StringToIntImpl(input, output); +} + +bool StringToSizeT(StringPiece16 input, size_t* output) { + return String16ToIntImpl(input, output); +} + +template <typename STRING, typename CHAR> +bool StringToDoubleImpl(STRING input, const CHAR* data, double* output) { + static NoDestructor<double_conversion::StringToDoubleConverter> converter( + double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK, + 0.0, 0, nullptr, nullptr); + + int processed_characters_count; + *output = converter->StringToDouble(data, input.size(), + &processed_characters_count); + + // Cases to return false: + // - If the input string is empty, there was nothing to parse. + // - If the value saturated to HUGE_VAL. + // - If the entire string was not processed, there are either characters + // remaining in the string after a parsed number, or the string does not + // begin with a parseable number. + // - If the first character is a space, there was leading whitespace + return !input.empty() && *output != HUGE_VAL && *output != -HUGE_VAL && + static_cast<size_t>(processed_characters_count) == input.size() && + !IsUnicodeWhitespace(input[0]); +} + +bool StringToDouble(StringPiece input, double* output) { + return StringToDoubleImpl(input, input.data(), output); +} + +bool StringToDouble(StringPiece16 input, double* output) { + return StringToDoubleImpl( + input, reinterpret_cast<const uint16_t*>(input.data()), output); +} + +std::string HexEncode(const void* bytes, size_t size) { + static const char kHexChars[] = "0123456789ABCDEF"; + + // Each input byte creates two output hex characters. + std::string ret(size * 2, '\0'); + + for (size_t i = 0; i < size; ++i) { + char b = reinterpret_cast<const char*>(bytes)[i]; + ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; + ret[(i * 2) + 1] = kHexChars[b & 0xf]; + } + return ret; +} + +std::string HexEncode(base::span<const uint8_t> bytes) { + return HexEncode(bytes.data(), bytes.size()); +} + +bool HexStringToInt(StringPiece input, int* output) { + return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToUInt(StringPiece input, uint32_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToInt64(StringPiece input, int64_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( + input.begin(), input.end(), output); +} + +bool HexStringToUInt64(StringPiece input, uint64_t* output) { + return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke( + input.begin(), input.end(), output); +} + +template <typename Container> +static bool HexStringToByteContainer(StringPiece input, Container* output) { + DCHECK_EQ(output->size(), 0u); + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + for (uintptr_t i = 0; i < count / 2; ++i) { + uint8_t msb = 0; // most significant 4 bits + uint8_t lsb = 0; // least significant 4 bits + if (!CharToDigit<16>(input[i * 2], &msb) || + !CharToDigit<16>(input[i * 2 + 1], &lsb)) { + return false; + } + output->push_back((msb << 4) | lsb); + } + return true; +} + +bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { + return HexStringToByteContainer(input, output); +} + +bool HexStringToString(StringPiece input, std::string* output) { + return HexStringToByteContainer(input, output); +} + +bool HexStringToSpan(StringPiece input, base::span<uint8_t> output) { + size_t count = input.size(); + if (count == 0 || (count % 2) != 0) + return false; + + if (count / 2 != output.size()) + return false; + + for (uintptr_t i = 0; i < count / 2; ++i) { + uint8_t msb = 0; // most significant 4 bits + uint8_t lsb = 0; // least significant 4 bits + if (!CharToDigit<16>(input[i * 2], &msb) || + !CharToDigit<16>(input[i * 2 + 1], &lsb)) { + return false; + } + output[i] = (msb << 4) | lsb; + } + return true; +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_number_conversions.h b/security/sandbox/chromium/base/strings/string_number_conversions.h new file mode 100644 index 0000000000..87df24e21c --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_number_conversions.h @@ -0,0 +1,157 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ +#define BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ + +#include <stddef.h> +#include <stdint.h> + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/containers/span.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "build/build_config.h" + +// ---------------------------------------------------------------------------- +// IMPORTANT MESSAGE FROM YOUR SPONSOR +// +// This file contains no "wstring" variants. New code should use string16. If +// you need to make old code work, use the UTF8 version and convert. Please do +// not add wstring variants. +// +// Please do not add "convenience" functions for converting strings to integers +// that return the value and ignore success/failure. That encourages people to +// write code that doesn't properly handle the error conditions. +// +// DO NOT use these functions in any UI unless it's NOT localized on purpose. +// Instead, use base::MessageFormatter for a complex message with numbers +// (integer, float, double) embedded or base::Format{Number,Double,Percent} to +// just format a single number/percent. Note that some languages use native +// digits instead of ASCII digits while others use a group separator or decimal +// point different from ',' and '.'. Using these functions in the UI would lead +// numbers to be formatted in a non-native way. +// ---------------------------------------------------------------------------- + +namespace base { + +// Number -> string conversions ------------------------------------------------ + +// Ignores locale! see warning above. +BASE_EXPORT std::string NumberToString(int value); +BASE_EXPORT string16 NumberToString16(int value); +BASE_EXPORT std::string NumberToString(unsigned int value); +BASE_EXPORT string16 NumberToString16(unsigned int value); +BASE_EXPORT std::string NumberToString(long value); +BASE_EXPORT string16 NumberToString16(long value); +BASE_EXPORT std::string NumberToString(unsigned long value); +BASE_EXPORT string16 NumberToString16(unsigned long value); +BASE_EXPORT std::string NumberToString(long long value); +BASE_EXPORT string16 NumberToString16(long long value); +BASE_EXPORT std::string NumberToString(unsigned long long value); +BASE_EXPORT string16 NumberToString16(unsigned long long value); +BASE_EXPORT std::string NumberToString(double value); +BASE_EXPORT string16 NumberToString16(double value); + +// String -> number conversions ------------------------------------------------ + +// Perform a best-effort conversion of the input string to a numeric type, +// setting |*output| to the result of the conversion. Returns true for +// "perfect" conversions; returns false in the following cases: +// - Overflow. |*output| will be set to the maximum value supported +// by the data type. +// - Underflow. |*output| will be set to the minimum value supported +// by the data type. +// - Trailing characters in the string after parsing the number. |*output| +// will be set to the value of the number that was parsed. +// - Leading whitespace in the string before parsing the number. |*output| will +// be set to the value of the number that was parsed. +// - No characters parseable as a number at the beginning of the string. +// |*output| will be set to 0. +// - Empty string. |*output| will be set to 0. +// WARNING: Will write to |output| even when returning false. +// Read the comments above carefully. +BASE_EXPORT bool StringToInt(StringPiece input, int* output); +BASE_EXPORT bool StringToInt(StringPiece16 input, int* output); + +BASE_EXPORT bool StringToUint(StringPiece input, unsigned* output); +BASE_EXPORT bool StringToUint(StringPiece16 input, unsigned* output); + +BASE_EXPORT bool StringToInt64(StringPiece input, int64_t* output); +BASE_EXPORT bool StringToInt64(StringPiece16 input, int64_t* output); + +BASE_EXPORT bool StringToUint64(StringPiece input, uint64_t* output); +BASE_EXPORT bool StringToUint64(StringPiece16 input, uint64_t* output); + +BASE_EXPORT bool StringToSizeT(StringPiece input, size_t* output); +BASE_EXPORT bool StringToSizeT(StringPiece16 input, size_t* output); + +// For floating-point conversions, only conversions of input strings in decimal +// form are defined to work. Behavior with strings representing floating-point +// numbers in hexadecimal, and strings representing non-finite values (such as +// NaN and inf) is undefined. Otherwise, these behave the same as the integral +// variants. This expects the input string to NOT be specific to the locale. +// If your input is locale specific, use ICU to read the number. +// WARNING: Will write to |output| even when returning false. +// Read the comments here and above StringToInt() carefully. +BASE_EXPORT bool StringToDouble(StringPiece input, double* output); +BASE_EXPORT bool StringToDouble(StringPiece16 input, double* output); + +// Hex encoding ---------------------------------------------------------------- + +// Returns a hex string representation of a binary buffer. The returned hex +// string will be in upper case. This function does not check if |size| is +// within reasonable limits since it's written with trusted data in mind. If +// you suspect that the data you want to format might be large, the absolute +// max size for |size| should be is +// std::numeric_limits<size_t>::max() / 2 +BASE_EXPORT std::string HexEncode(const void* bytes, size_t size); +BASE_EXPORT std::string HexEncode(base::span<const uint8_t> bytes); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// -0x80000000 < |input| < 0x7FFFFFFF. +BASE_EXPORT bool HexStringToInt(StringPiece input, int* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// 0x00000000 < |input| < 0xFFFFFFFF. +// The string is not required to start with 0x. +BASE_EXPORT bool HexStringToUInt(StringPiece input, uint32_t* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// -0x8000000000000000 < |input| < 0x7FFFFFFFFFFFFFFF. +BASE_EXPORT bool HexStringToInt64(StringPiece input, int64_t* output); + +// Best effort conversion, see StringToInt above for restrictions. +// Will only successful parse hex values that will fit into |output|, i.e. +// 0x0000000000000000 < |input| < 0xFFFFFFFFFFFFFFFF. +// The string is not required to start with 0x. +BASE_EXPORT bool HexStringToUInt64(StringPiece input, uint64_t* output); + +// Similar to the previous functions, except that output is a vector of bytes. +// |*output| will contain as many bytes as were successfully parsed prior to the +// error. There is no overflow, but input.size() must be evenly divisible by 2. +// Leading 0x or +/- are not allowed. +BASE_EXPORT bool HexStringToBytes(StringPiece input, + std::vector<uint8_t>* output); + +// Same as HexStringToBytes, but for an std::string. +BASE_EXPORT bool HexStringToString(StringPiece input, std::string* output); + +// Decodes the hex string |input| into a presized |output|. The output buffer +// must be sized exactly to |input.size() / 2| or decoding will fail and no +// bytes will be written to |output|. Decoding an empty input is also +// considered a failure. When decoding fails due to encountering invalid input +// characters, |output| will have been filled with the decoded bytes up until +// the failure. +BASE_EXPORT bool HexStringToSpan(StringPiece input, base::span<uint8_t> output); + +} // namespace base + +#endif // BASE_STRINGS_STRING_NUMBER_CONVERSIONS_H_ diff --git a/security/sandbox/chromium/base/strings/string_piece.cc b/security/sandbox/chromium/base/strings/string_piece.cc new file mode 100644 index 0000000000..d743144a4e --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_piece.cc @@ -0,0 +1,426 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// Copied from strings/stringpiece.cc with modifications + +#include "base/strings/string_piece.h" + +#include <limits.h> + +#include <algorithm> +#include <ostream> + +#include "base/logging.h" +#include "base/strings/utf_string_conversions.h" + +namespace base { +namespace { + +// For each character in characters_wanted, sets the index corresponding +// to the ASCII code of that character to 1 in table. This is used by +// the find_.*_of methods below to tell whether or not a character is in +// the lookup table in constant time. +// The argument `table' must be an array that is large enough to hold all +// the possible values of an unsigned char. Thus it should be be declared +// as follows: +// bool table[UCHAR_MAX + 1] +inline void BuildLookupTable(const StringPiece& characters_wanted, + bool* table) { + const size_t length = characters_wanted.length(); + const char* const data = characters_wanted.data(); + for (size_t i = 0; i < length; ++i) { + table[static_cast<unsigned char>(data[i])] = true; + } +} + +} // namespace + +// MSVC doesn't like complex extern templates and DLLs. +#if !defined(COMPILER_MSVC) +template class BasicStringPiece<std::string>; +template class BasicStringPiece<string16>; +#endif + +std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { + o.write(piece.data(), static_cast<std::streamsize>(piece.size())); + return o; +} + +std::ostream& operator<<(std::ostream& o, const StringPiece16& piece) { + return o << UTF16ToUTF8(piece); +} + +namespace internal { + +template<typename STR> +void AppendToStringT(const BasicStringPiece<STR>& self, STR* target) { + if (!self.empty()) + target->append(self.data(), self.size()); +} + +void AppendToString(const StringPiece& self, std::string* target) { + AppendToStringT(self, target); +} + +void AppendToString(const StringPiece16& self, string16* target) { + AppendToStringT(self, target); +} + +template<typename STR> +size_t copyT(const BasicStringPiece<STR>& self, + typename STR::value_type* buf, + size_t n, + size_t pos) { + size_t ret = std::min(self.size() - pos, n); + memcpy(buf, self.data() + pos, ret * sizeof(typename STR::value_type)); + return ret; +} + +size_t copy(const StringPiece& self, char* buf, size_t n, size_t pos) { + return copyT(self, buf, n, pos); +} + +size_t copy(const StringPiece16& self, char16* buf, size_t n, size_t pos) { + return copyT(self, buf, n, pos); +} + +template<typename STR> +size_t findT(const BasicStringPiece<STR>& self, + const BasicStringPiece<STR>& s, + size_t pos) { + if (pos > self.size()) + return BasicStringPiece<STR>::npos; + + typename BasicStringPiece<STR>::const_iterator result = + std::search(self.begin() + pos, self.end(), s.begin(), s.end()); + const size_t xpos = + static_cast<size_t>(result - self.begin()); + return xpos + s.size() <= self.size() ? xpos : BasicStringPiece<STR>::npos; +} + +size_t find(const StringPiece& self, const StringPiece& s, size_t pos) { + return findT(self, s, pos); +} + +size_t find(const StringPiece16& self, const StringPiece16& s, size_t pos) { + return findT(self, s, pos); +} + +template<typename STR> +size_t findT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (pos >= self.size()) + return BasicStringPiece<STR>::npos; + + typename BasicStringPiece<STR>::const_iterator result = + std::find(self.begin() + pos, self.end(), c); + return result != self.end() ? + static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; +} + +size_t find(const StringPiece& self, char c, size_t pos) { + return findT(self, c, pos); +} + +size_t find(const StringPiece16& self, char16 c, size_t pos) { + return findT(self, c, pos); +} + +template<typename STR> +size_t rfindT(const BasicStringPiece<STR>& self, + const BasicStringPiece<STR>& s, + size_t pos) { + if (self.size() < s.size()) + return BasicStringPiece<STR>::npos; + + if (s.empty()) + return std::min(self.size(), pos); + + typename BasicStringPiece<STR>::const_iterator last = + self.begin() + std::min(self.size() - s.size(), pos) + s.size(); + typename BasicStringPiece<STR>::const_iterator result = + std::find_end(self.begin(), last, s.begin(), s.end()); + return result != last ? + static_cast<size_t>(result - self.begin()) : BasicStringPiece<STR>::npos; +} + +size_t rfind(const StringPiece& self, const StringPiece& s, size_t pos) { + return rfindT(self, s, pos); +} + +size_t rfind(const StringPiece16& self, const StringPiece16& s, size_t pos) { + return rfindT(self, s, pos); +} + +template<typename STR> +size_t rfindT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (size_t i = std::min(pos, self.size() - 1); ; + --i) { + if (self.data()[i] == c) + return i; + if (i == 0) + break; + } + return BasicStringPiece<STR>::npos; +} + +size_t rfind(const StringPiece& self, char c, size_t pos) { + return rfindT(self, c, pos); +} + +size_t rfind(const StringPiece16& self, char16 c, size_t pos) { + return rfindT(self, c, pos); +} + +// 8-bit version using lookup table. +size_t find_first_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0 || s.size() == 0) + return StringPiece::npos; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = pos; i < self.size(); ++i) { + if (lookup[static_cast<unsigned char>(self.data()[i])]) { + return i; + } + } + return StringPiece::npos; +} + +// 16-bit brute force version. +size_t find_first_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + // Use the faster std::find() if searching for a single character. + StringPiece16::const_iterator found = + s.size() == 1 ? std::find(self.begin() + pos, self.end(), s[0]) + : std::find_first_of(self.begin() + pos, self.end(), + s.begin(), s.end()); + if (found == self.end()) + return StringPiece16::npos; + return found - self.begin(); +} + +// 8-bit version using lookup table. +size_t find_first_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + if (s.size() == 0) + return 0; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find_first_not_of(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = pos; i < self.size(); ++i) { + if (!lookup[static_cast<unsigned char>(self.data()[i])]) { + return i; + } + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece16::npos; + + for (size_t self_i = pos; self_i < self.size(); ++self_i) { + bool found = false; + for (auto c : s) { + if (self[self_i] == c) { + found = true; + break; + } + } + if (!found) + return self_i; + } + return StringPiece16::npos; +} + +template<typename STR> +size_t find_first_not_ofT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (; pos < self.size(); ++pos) { + if (self.data()[pos] != c) { + return pos; + } + } + return BasicStringPiece<STR>::npos; +} + +size_t find_first_not_of(const StringPiece& self, + char c, + size_t pos) { + return find_first_not_ofT(self, c, pos); +} + +size_t find_first_not_of(const StringPiece16& self, + char16 c, + size_t pos) { + return find_first_not_ofT(self, c, pos); +} + +// 8-bit version using lookup table. +size_t find_last_of(const StringPiece& self, const StringPiece& s, size_t pos) { + if (self.size() == 0 || s.size() == 0) + return StringPiece::npos; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return rfind(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (size_t i = std::min(pos, self.size() - 1); ; --i) { + if (lookup[static_cast<unsigned char>(self.data()[i])]) + return i; + if (i == 0) + break; + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +size_t find_last_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece16::npos; + + for (size_t self_i = std::min(pos, self.size() - 1); ; + --self_i) { + for (auto c : s) { + if (self.data()[self_i] == c) + return self_i; + } + if (self_i == 0) + break; + } + return StringPiece16::npos; +} + +// 8-bit version using lookup table. +size_t find_last_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + size_t i = std::min(pos, self.size() - 1); + if (s.size() == 0) + return i; + + // Avoid the cost of BuildLookupTable() for a single-character search. + if (s.size() == 1) + return find_last_not_of(self, s.data()[0], pos); + + bool lookup[UCHAR_MAX + 1] = { false }; + BuildLookupTable(s, lookup); + for (; ; --i) { + if (!lookup[static_cast<unsigned char>(self.data()[i])]) + return i; + if (i == 0) + break; + } + return StringPiece::npos; +} + +// 16-bit brute-force version. +size_t find_last_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos) { + if (self.size() == 0) + return StringPiece::npos; + + for (size_t self_i = std::min(pos, self.size() - 1); ; --self_i) { + bool found = false; + for (auto c : s) { + if (self.data()[self_i] == c) { + found = true; + break; + } + } + if (!found) + return self_i; + if (self_i == 0) + break; + } + return StringPiece16::npos; +} + +template<typename STR> +size_t find_last_not_ofT(const BasicStringPiece<STR>& self, + typename STR::value_type c, + size_t pos) { + if (self.size() == 0) + return BasicStringPiece<STR>::npos; + + for (size_t i = std::min(pos, self.size() - 1); ; --i) { + if (self.data()[i] != c) + return i; + if (i == 0) + break; + } + return BasicStringPiece<STR>::npos; +} + +size_t find_last_not_of(const StringPiece& self, + char c, + size_t pos) { + return find_last_not_ofT(self, c, pos); +} + +size_t find_last_not_of(const StringPiece16& self, + char16 c, + size_t pos) { + return find_last_not_ofT(self, c, pos); +} + +template<typename STR> +BasicStringPiece<STR> substrT(const BasicStringPiece<STR>& self, + size_t pos, + size_t n) { + if (pos > self.size()) pos = self.size(); + if (n > self.size() - pos) n = self.size() - pos; + return BasicStringPiece<STR>(self.data() + pos, n); +} + +StringPiece substr(const StringPiece& self, + size_t pos, + size_t n) { + return substrT(self, pos, n); +} + +StringPiece16 substr(const StringPiece16& self, + size_t pos, + size_t n) { + return substrT(self, pos, n); +} + +} // namespace internal +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_piece.h b/security/sandbox/chromium/base/strings/string_piece.h new file mode 100644 index 0000000000..536bf3f023 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_piece.h @@ -0,0 +1,513 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// Copied from strings/stringpiece.h with modifications +// +// A string-like object that points to a sized piece of memory. +// +// You can use StringPiece as a function or method parameter. A StringPiece +// parameter can receive a double-quoted string literal argument, a "const +// char*" argument, a string argument, or a StringPiece argument with no data +// copying. Systematic use of StringPiece for arguments reduces data +// copies and strlen() calls. +// +// Prefer passing StringPieces by value: +// void MyFunction(StringPiece arg); +// If circumstances require, you may also pass by const reference: +// void MyFunction(const StringPiece& arg); // not preferred +// Both of these have the same lifetime semantics. Passing by value +// generates slightly smaller code. For more discussion, Googlers can see +// the thread go/stringpiecebyvalue on c-users. + +#ifndef BASE_STRINGS_STRING_PIECE_H_ +#define BASE_STRINGS_STRING_PIECE_H_ + +#include <stddef.h> + +#include <iosfwd> +#include <string> +#include <type_traits> + +#include "base/base_export.h" +#include "base/logging.h" +#include "base/strings/char_traits.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece_forward.h" + +namespace base { + +// internal -------------------------------------------------------------------- + +// Many of the StringPiece functions use different implementations for the +// 8-bit and 16-bit versions, and we don't want lots of template expansions in +// this (very common) header that will slow down compilation. +// +// So here we define overloaded functions called by the StringPiece template. +// For those that share an implementation, the two versions will expand to a +// template internal to the .cc file. +namespace internal { + +BASE_EXPORT void AppendToString(const StringPiece& self, std::string* target); +BASE_EXPORT void AppendToString(const StringPiece16& self, string16* target); + +BASE_EXPORT size_t copy(const StringPiece& self, + char* buf, + size_t n, + size_t pos); +BASE_EXPORT size_t copy(const StringPiece16& self, + char16* buf, + size_t n, + size_t pos); + +BASE_EXPORT size_t find(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t rfind(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t rfind(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_first_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_first_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); + +BASE_EXPORT size_t find_first_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find_first_not_of(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_last_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece& self, + char c, + size_t pos); +BASE_EXPORT size_t find_last_of(const StringPiece16& self, + char16 c, + size_t pos); + +BASE_EXPORT size_t find_last_not_of(const StringPiece& self, + const StringPiece& s, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, + const StringPiece16& s, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece16& self, + char16 c, + size_t pos); +BASE_EXPORT size_t find_last_not_of(const StringPiece& self, + char c, + size_t pos); + +BASE_EXPORT StringPiece substr(const StringPiece& self, + size_t pos, + size_t n); +BASE_EXPORT StringPiece16 substr(const StringPiece16& self, + size_t pos, + size_t n); + +} // namespace internal + +// BasicStringPiece ------------------------------------------------------------ + +// Defines the types, methods, operators, and data members common to both +// StringPiece and StringPiece16. +// +// This is templatized by string class type rather than character type, so +// BasicStringPiece<std::string> or BasicStringPiece<base::string16>. +template <typename STRING_TYPE> class BasicStringPiece { + public: + // Standard STL container boilerplate. + typedef size_t size_type; + typedef typename STRING_TYPE::value_type value_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef const value_type& const_reference; + typedef ptrdiff_t difference_type; + typedef const value_type* const_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + + static const size_type npos; + + public: + // We provide non-explicit singleton constructors so users can pass + // in a "const char*" or a "string" wherever a "StringPiece" is + // expected (likewise for char16, string16, StringPiece16). + constexpr BasicStringPiece() : ptr_(NULL), length_(0) {} + // TODO(dcheng): Construction from nullptr is not allowed for + // std::basic_string_view, so remove the special handling for it. + // Note: This doesn't just use STRING_TYPE::traits_type::length(), since that + // isn't constexpr until C++17. + constexpr BasicStringPiece(const value_type* str) + : ptr_(str), length_(!str ? 0 : CharTraits<value_type>::length(str)) {} + BasicStringPiece(const STRING_TYPE& str) + : ptr_(str.data()), length_(str.size()) {} + constexpr BasicStringPiece(const value_type* offset, size_type len) + : ptr_(offset), length_(len) {} + BasicStringPiece(const typename STRING_TYPE::const_iterator& begin, + const typename STRING_TYPE::const_iterator& end) { + DCHECK(begin <= end) << "StringPiece iterators swapped or invalid."; + length_ = static_cast<size_t>(std::distance(begin, end)); + + // The length test before assignment is to avoid dereferencing an iterator + // that may point to the end() of a string. + ptr_ = length_ > 0 ? &*begin : nullptr; + } + + // data() may return a pointer to a buffer with embedded NULs, and the + // returned buffer may or may not be null terminated. Therefore it is + // typically a mistake to pass data() to a routine that expects a NUL + // terminated string. + constexpr const value_type* data() const { return ptr_; } + constexpr size_type size() const noexcept { return length_; } + constexpr size_type length() const noexcept { return length_; } + bool empty() const { return length_ == 0; } + + constexpr value_type operator[](size_type i) const { + CHECK(i < length_); + return ptr_[i]; + } + + value_type front() const { + CHECK_NE(0UL, length_); + return ptr_[0]; + } + + value_type back() const { + CHECK_NE(0UL, length_); + return ptr_[length_ - 1]; + } + + constexpr void remove_prefix(size_type n) { + CHECK(n <= length_); + ptr_ += n; + length_ -= n; + } + + constexpr void remove_suffix(size_type n) { + CHECK(n <= length_); + length_ -= n; + } + + constexpr int compare(BasicStringPiece x) const noexcept { + int r = CharTraits<value_type>::compare( + ptr_, x.ptr_, (length_ < x.length_ ? length_ : x.length_)); + if (r == 0) { + if (length_ < x.length_) r = -1; + else if (length_ > x.length_) r = +1; + } + return r; + } + + // This is the style of conversion preferred by std::string_view in C++17. + explicit operator STRING_TYPE() const { return as_string(); } + + STRING_TYPE as_string() const { + // std::string doesn't like to take a NULL pointer even with a 0 size. + return empty() ? STRING_TYPE() : STRING_TYPE(data(), size()); + } + + const_iterator begin() const { return ptr_; } + const_iterator end() const { return ptr_ + length_; } + const_reverse_iterator rbegin() const { + return const_reverse_iterator(ptr_ + length_); + } + const_reverse_iterator rend() const { + return const_reverse_iterator(ptr_); + } + + size_type max_size() const { return length_; } + size_type capacity() const { return length_; } + + void AppendToString(STRING_TYPE* target) const { + internal::AppendToString(*this, target); + } + + size_type copy(value_type* buf, size_type n, size_type pos = 0) const { + return internal::copy(*this, buf, n, pos); + } + + // Does "this" start with "x" + constexpr bool starts_with(BasicStringPiece x) const noexcept { + return ( + (this->length_ >= x.length_) && + (CharTraits<value_type>::compare(this->ptr_, x.ptr_, x.length_) == 0)); + } + + // Does "this" end with "x" + constexpr bool ends_with(BasicStringPiece x) const noexcept { + return ((this->length_ >= x.length_) && + (CharTraits<value_type>::compare( + this->ptr_ + (this->length_ - x.length_), x.ptr_, x.length_) == + 0)); + } + + // find: Search for a character or substring at a given offset. + size_type find(const BasicStringPiece<STRING_TYPE>& s, + size_type pos = 0) const { + return internal::find(*this, s, pos); + } + size_type find(value_type c, size_type pos = 0) const { + return internal::find(*this, c, pos); + } + + // rfind: Reverse find. + size_type rfind(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::rfind(*this, s, pos); + } + size_type rfind(value_type c, size_type pos = BasicStringPiece::npos) const { + return internal::rfind(*this, c, pos); + } + + // find_first_of: Find the first occurence of one of a set of characters. + size_type find_first_of(const BasicStringPiece& s, + size_type pos = 0) const { + return internal::find_first_of(*this, s, pos); + } + size_type find_first_of(value_type c, size_type pos = 0) const { + return find(c, pos); + } + + // find_first_not_of: Find the first occurence not of a set of characters. + size_type find_first_not_of(const BasicStringPiece& s, + size_type pos = 0) const { + return internal::find_first_not_of(*this, s, pos); + } + size_type find_first_not_of(value_type c, size_type pos = 0) const { + return internal::find_first_not_of(*this, c, pos); + } + + // find_last_of: Find the last occurence of one of a set of characters. + size_type find_last_of(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_of(*this, s, pos); + } + size_type find_last_of(value_type c, + size_type pos = BasicStringPiece::npos) const { + return rfind(c, pos); + } + + // find_last_not_of: Find the last occurence not of a set of characters. + size_type find_last_not_of(const BasicStringPiece& s, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_not_of(*this, s, pos); + } + size_type find_last_not_of(value_type c, + size_type pos = BasicStringPiece::npos) const { + return internal::find_last_not_of(*this, c, pos); + } + + // substr. + BasicStringPiece substr(size_type pos, + size_type n = BasicStringPiece::npos) const { + return internal::substr(*this, pos, n); + } + + protected: + const value_type* ptr_; + size_type length_; +}; + +template <typename STRING_TYPE> +const typename BasicStringPiece<STRING_TYPE>::size_type +BasicStringPiece<STRING_TYPE>::npos = + typename BasicStringPiece<STRING_TYPE>::size_type(-1); + +// MSVC doesn't like complex extern templates and DLLs. +#if !defined(COMPILER_MSVC) +extern template class BASE_EXPORT BasicStringPiece<std::string>; +extern template class BASE_EXPORT BasicStringPiece<string16>; +#endif + +// Comparison operators -------------------------------------------------------- +// operator == +template <typename StringT> +constexpr bool operator==(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; +} + +// Here and below we make use of std::common_type_t to emulate an identity type +// transformation. This creates a non-deduced context, so that we can compare +// StringPieces with types that implicitly convert to StringPieces. See +// https://wg21.link/n3766 for details. +// Furthermore, we require dummy template parameters for these overloads to work +// around a name mangling issue on Windows. +template <typename StringT, int = 1> +constexpr bool operator==( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; +} + +template <typename StringT, int = 2> +constexpr bool operator==(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return lhs.size() == rhs.size() && lhs.compare(rhs) == 0; +} + +// operator != +template <typename StringT> +constexpr bool operator!=(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(lhs == rhs); +} + +template <typename StringT, int = 1> +constexpr bool operator!=( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return !(lhs == rhs); +} + +template <typename StringT, int = 2> +constexpr bool operator!=(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(lhs == rhs); +} + +// operator < +template <typename StringT> +constexpr bool operator<(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return lhs.compare(rhs) < 0; +} + +template <typename StringT, int = 1> +constexpr bool operator<( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return lhs.compare(rhs) < 0; +} + +template <typename StringT, int = 2> +constexpr bool operator<(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return lhs.compare(rhs) < 0; +} + +// operator > +template <typename StringT> +constexpr bool operator>(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return rhs < lhs; +} + +template <typename StringT, int = 1> +constexpr bool operator>( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return rhs < lhs; +} + +template <typename StringT, int = 2> +constexpr bool operator>(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return rhs < lhs; +} + +// operator <= +template <typename StringT> +constexpr bool operator<=(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(rhs < lhs); +} + +template <typename StringT, int = 1> +constexpr bool operator<=( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return !(rhs < lhs); +} + +template <typename StringT, int = 2> +constexpr bool operator<=(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(rhs < lhs); +} + +// operator >= +template <typename StringT> +constexpr bool operator>=(BasicStringPiece<StringT> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(lhs < rhs); +} + +template <typename StringT, int = 1> +constexpr bool operator>=( + BasicStringPiece<StringT> lhs, + std::common_type_t<BasicStringPiece<StringT>> rhs) noexcept { + return !(lhs < rhs); +} + +template <typename StringT, int = 2> +constexpr bool operator>=(std::common_type_t<BasicStringPiece<StringT>> lhs, + BasicStringPiece<StringT> rhs) noexcept { + return !(lhs < rhs); +} + +BASE_EXPORT std::ostream& operator<<(std::ostream& o, + const StringPiece& piece); + +BASE_EXPORT std::ostream& operator<<(std::ostream& o, + const StringPiece16& piece); + +// Hashing --------------------------------------------------------------------- + +// We provide appropriate hash functions so StringPiece and StringPiece16 can +// be used as keys in hash sets and maps. + +// This hash function is copied from base/strings/string16.h. We don't use the +// ones already defined for string and string16 directly because it would +// require the string constructors to be called, which we don't want. + +template <typename StringPieceType> +struct StringPieceHashImpl { + std::size_t operator()(StringPieceType sp) const { + std::size_t result = 0; + for (auto c : sp) + result = (result * 131) + c; + return result; + } +}; + +using StringPieceHash = StringPieceHashImpl<StringPiece>; +using StringPiece16Hash = StringPieceHashImpl<StringPiece16>; +using WStringPieceHash = StringPieceHashImpl<WStringPiece>; + +} // namespace base + +#endif // BASE_STRINGS_STRING_PIECE_H_ diff --git a/security/sandbox/chromium/base/strings/string_piece_forward.h b/security/sandbox/chromium/base/strings/string_piece_forward.h new file mode 100644 index 0000000000..b50b9806c9 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_piece_forward.h @@ -0,0 +1,24 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Forward declaration of StringPiece types from base/strings/string_piece.h + +#ifndef BASE_STRINGS_STRING_PIECE_FORWARD_H_ +#define BASE_STRINGS_STRING_PIECE_FORWARD_H_ + +#include <string> + +#include "base/strings/string16.h" + +namespace base { + +template <typename STRING_TYPE> +class BasicStringPiece; +typedef BasicStringPiece<std::string> StringPiece; +typedef BasicStringPiece<string16> StringPiece16; +typedef BasicStringPiece<std::wstring> WStringPiece; + +} // namespace base + +#endif // BASE_STRINGS_STRING_PIECE_FORWARD_H_ diff --git a/security/sandbox/chromium/base/strings/string_split.cc b/security/sandbox/chromium/base/strings/string_split.cc new file mode 100644 index 0000000000..42bc5e5b60 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_split.cc @@ -0,0 +1,254 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_split.h" + +#include <stddef.h> + +#include "base/logging.h" +#include "base/strings/string_util.h" +#include "base/third_party/icu/icu_utf.h" + +namespace base { + +namespace { + +// Returns either the ASCII or UTF-16 whitespace. +template<typename Str> BasicStringPiece<Str> WhitespaceForType(); +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +template <> +WStringPiece WhitespaceForType<std::wstring>() { + return kWhitespaceWide; +} +#endif + +template<> StringPiece16 WhitespaceForType<string16>() { + return kWhitespaceUTF16; +} +template<> StringPiece WhitespaceForType<std::string>() { + return kWhitespaceASCII; +} + +// General string splitter template. Can take 8- or 16-bit input, can produce +// the corresponding string or StringPiece output. +template <typename OutputStringType, typename Str> +static std::vector<OutputStringType> SplitStringT( + BasicStringPiece<Str> str, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<OutputStringType> result; + if (str.empty()) + return result; + + size_t start = 0; + while (start != Str::npos) { + size_t end = str.find_first_of(delimiter, start); + + BasicStringPiece<Str> piece; + if (end == Str::npos) { + piece = str.substr(start); + start = Str::npos; + } else { + piece = str.substr(start, end - start); + start = end + 1; + } + + if (whitespace == TRIM_WHITESPACE) + piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !piece.empty()) + result.emplace_back(piece); + } + return result; +} + +bool AppendStringKeyValue(StringPiece input, + char delimiter, + StringPairs* result) { + // Always append a new item regardless of success (it might be empty). The + // below code will copy the strings directly into the result pair. + result->resize(result->size() + 1); + auto& result_pair = result->back(); + + // Find the delimiter. + size_t end_key_pos = input.find_first_of(delimiter); + if (end_key_pos == std::string::npos) { + DVLOG(1) << "cannot find delimiter in: " << input; + return false; // No delimiter. + } + result_pair.first = std::string(input.substr(0, end_key_pos)); + + // Find the value string. + StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos); + size_t begin_value_pos = remains.find_first_not_of(delimiter); + if (begin_value_pos == StringPiece::npos) { + DVLOG(1) << "cannot parse value from input: " << input; + return false; // No value. + } + + result_pair.second = std::string( + remains.substr(begin_value_pos, remains.size() - begin_value_pos)); + + return true; +} + +template <typename OutputStringType, typename Str> +std::vector<OutputStringType> SplitStringUsingSubstrT( + BasicStringPiece<Str> input, + BasicStringPiece<Str> delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + using Piece = BasicStringPiece<Str>; + using size_type = typename Piece::size_type; + + std::vector<OutputStringType> result; + for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; + begin_index = end_index + delimiter.size()) { + end_index = input.find(delimiter, begin_index); + Piece term = end_index == Piece::npos + ? input.substr(begin_index) + : input.substr(begin_index, end_index - begin_index); + + if (whitespace == TRIM_WHITESPACE) + term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); + + if (result_type == SPLIT_WANT_ALL || !term.empty()) + result.emplace_back(term); + } + + return result; +} + +} // namespace + +std::vector<std::string> SplitString(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<std::string>(input, separators, whitespace, result_type); +} + +std::vector<string16> SplitString(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<string16>(input, separators, whitespace, result_type); +} + +std::vector<StringPiece> SplitStringPiece(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<StringPiece>(input, separators, whitespace, result_type); +} + +std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<StringPiece16>(input, separators, whitespace, + result_type); +} + +bool SplitStringIntoKeyValuePairs(StringPiece input, + char key_value_delimiter, + char key_value_pair_delimiter, + StringPairs* key_value_pairs) { + return SplitStringIntoKeyValuePairsUsingSubstr( + input, key_value_delimiter, StringPiece(&key_value_pair_delimiter, 1), + key_value_pairs); +} + +bool SplitStringIntoKeyValuePairsUsingSubstr( + StringPiece input, + char key_value_delimiter, + StringPiece key_value_pair_delimiter, + StringPairs* key_value_pairs) { + key_value_pairs->clear(); + + std::vector<StringPiece> pairs = SplitStringPieceUsingSubstr( + input, key_value_pair_delimiter, TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); + key_value_pairs->reserve(pairs.size()); + + bool success = true; + for (const StringPiece& pair : pairs) { + if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) { + // Don't return here, to allow for pairs without associated + // value or key; just record that the split failed. + success = false; + } + } + return success; +} + +std::vector<string16> SplitStringUsingSubstr(StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<string16>(input, delimiter, whitespace, + result_type); +} + +std::vector<std::string> SplitStringUsingSubstr(StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<std::string>(input, delimiter, whitespace, + result_type); +} + +std::vector<StringPiece16> SplitStringPieceUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + std::vector<StringPiece16> result; + return SplitStringUsingSubstrT<StringPiece16>(input, delimiter, whitespace, + result_type); +} + +std::vector<StringPiece> SplitStringPieceUsingSubstr( + StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<StringPiece>(input, delimiter, whitespace, + result_type); +} + +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<std::wstring>(input, separators, whitespace, result_type); +} + +std::vector<WStringPiece> SplitStringPiece(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringT<WStringPiece>(input, separators, whitespace, result_type); +} + +std::vector<std::wstring> SplitStringUsingSubstr(WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<std::wstring>(input, delimiter, whitespace, + result_type); +} + +std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) { + return SplitStringUsingSubstrT<WStringPiece>(input, delimiter, whitespace, + result_type); +} +#endif + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_split.h b/security/sandbox/chromium/base/strings/string_split.h new file mode 100644 index 0000000000..efa8b199fe --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_split.h @@ -0,0 +1,169 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_SPLIT_H_ +#define BASE_STRINGS_STRING_SPLIT_H_ + +#include <string> +#include <utility> +#include <vector> + +#include "base/base_export.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "build/build_config.h" + +namespace base { + +enum WhitespaceHandling { + KEEP_WHITESPACE, + TRIM_WHITESPACE, +}; + +enum SplitResult { + // Strictly return all results. + // + // If the input is ",," and the separator is ',' this will return a + // vector of three empty strings. + SPLIT_WANT_ALL, + + // Only nonempty results will be added to the results. Multiple separators + // will be coalesced. Separators at the beginning and end of the input will + // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. + // + // If the input is ",," and the separator is ',', this will return an empty + // vector. + SPLIT_WANT_NONEMPTY, +}; + +// Split the given string on ANY of the given separators, returning copies of +// the result. +// +// Note this is inverse of JoinString() defined in string_util.h. +// +// To split on either commas or semicolons, keeping all whitespace: +// +// std::vector<std::string> tokens = base::SplitString( +// input, ", WARN_UNUSED_RESULT;", base::KEEP_WHITESPACE, +// base::SPLIT_WANT_ALL) WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<std::string> SplitString(StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<string16> SplitString(StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; + +// Like SplitString above except it returns a vector of StringPieces which +// reference the original buffer without copying. Although you have to be +// careful to keep the original string unmodified, this provides an efficient +// way to iterate through tokens in a string. +// +// Note this is inverse of JoinString() defined in string_util.h. +// +// To iterate through all whitespace-separated tokens in an input string: +// +// for (const auto& cur : +// base::SplitStringPiece(input, base::kWhitespaceASCII, +// base::KEEP_WHITESPACE, +// base::SPLIT_WANT_NONEMPTY)) { +// ... +BASE_EXPORT std::vector<StringPiece> SplitStringPiece( + StringPiece input, + StringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( + StringPiece16 input, + StringPiece16 separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +using StringPairs = std::vector<std::pair<std::string, std::string>>; + +// Splits |line| into key value pairs according to the given delimiters and +// removes whitespace leading each key and trailing each value. Returns true +// only if each pair has a non-empty key and value. |key_value_pairs| will +// include ("","") pairs for entries without |key_value_delimiter|. +BASE_EXPORT bool SplitStringIntoKeyValuePairs(StringPiece input, + char key_value_delimiter, + char key_value_pair_delimiter, + StringPairs* key_value_pairs); + +// Similar to SplitStringIntoKeyValuePairs, but use a substring +// |key_value_pair_delimiter| instead of a single char. +BASE_EXPORT bool SplitStringIntoKeyValuePairsUsingSubstr( + StringPiece input, + char key_value_delimiter, + StringPiece key_value_pair_delimiter, + StringPairs* key_value_pairs); + +// Similar to SplitString, but use a substring delimiter instead of a list of +// characters that are all possible delimiters. +BASE_EXPORT std::vector<string16> SplitStringUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr( + StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +// Like SplitStringUsingSubstr above except it returns a vector of StringPieces +// which reference the original buffer without copying. Although you have to be +// careful to keep the original string unmodified, this provides an efficient +// way to iterate through tokens in a string. +// +// To iterate through all newline-separated tokens in an input string: +// +// for (const auto& cur : +// base::SplitStringUsingSubstr(input, "\r\n", +// base::KEEP_WHITESPACE, +// base::SPLIT_WANT_NONEMPTY)) { +// ... +BASE_EXPORT std::vector<StringPiece16> SplitStringPieceUsingSubstr( + StringPiece16 input, + StringPiece16 delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr( + StringPiece input, + StringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT std::vector<std::wstring> SplitString(WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) + WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPiece( + WStringPiece input, + WStringPiece separators, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<std::wstring> SplitStringUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; + +BASE_EXPORT std::vector<WStringPiece> SplitStringPieceUsingSubstr( + WStringPiece input, + WStringPiece delimiter, + WhitespaceHandling whitespace, + SplitResult result_type) WARN_UNUSED_RESULT; +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRING_SPLIT_H_ diff --git a/security/sandbox/chromium/base/strings/string_util.cc b/security/sandbox/chromium/base/strings/string_util.cc new file mode 100644 index 0000000000..7e140fae48 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util.cc @@ -0,0 +1,1157 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util.h" + +#include <ctype.h> +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <wchar.h> +#include <wctype.h> + +#include <algorithm> +#include <limits> +#include <vector> + +#include "base/logging.h" +#include "base/no_destructor.h" +#include "base/stl_util.h" +#include "base/strings/utf_string_conversion_utils.h" +#include "base/strings/utf_string_conversions.h" +#include "base/third_party/icu/icu_utf.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +// Used by ReplaceStringPlaceholders to track the position in the string of +// replaced parameters. +struct ReplacementOffset { + ReplacementOffset(uintptr_t parameter, size_t offset) + : parameter(parameter), + offset(offset) {} + + // Index of the parameter. + uintptr_t parameter; + + // Starting position in the string. + size_t offset; +}; + +static bool CompareParameter(const ReplacementOffset& elem1, + const ReplacementOffset& elem2) { + return elem1.parameter < elem2.parameter; +} + +// Overloaded function to append one string onto the end of another. Having a +// separate overload for |source| as both string and StringPiece allows for more +// efficient usage from functions templated to work with either type (avoiding a +// redundant call to the BasicStringPiece constructor in both cases). +template <typename string_type> +inline void AppendToString(string_type* target, const string_type& source) { + target->append(source); +} + +template <typename string_type> +inline void AppendToString(string_type* target, + const BasicStringPiece<string_type>& source) { + source.AppendToString(target); +} + +// Assuming that a pointer is the size of a "machine word", then +// uintptr_t is an integer type that is also a machine word. +using MachineWord = uintptr_t; + +inline bool IsMachineWordAligned(const void* pointer) { + return !(reinterpret_cast<MachineWord>(pointer) & (sizeof(MachineWord) - 1)); +} + +template <typename CharacterType> +struct NonASCIIMask; +template <> +struct NonASCIIMask<char> { + static constexpr MachineWord value() { + return static_cast<MachineWord>(0x8080808080808080ULL); + } +}; +template <> +struct NonASCIIMask<char16> { + static constexpr MachineWord value() { + return static_cast<MachineWord>(0xFF80FF80FF80FF80ULL); + } +}; +#if defined(WCHAR_T_IS_UTF32) +template <> +struct NonASCIIMask<wchar_t> { + static constexpr MachineWord value() { + return static_cast<MachineWord>(0xFFFFFF80FFFFFF80ULL); + } +}; +#endif // WCHAR_T_IS_UTF32 + +} // namespace + +bool IsWprintfFormatPortable(const wchar_t* format) { + for (const wchar_t* position = format; *position != '\0'; ++position) { + if (*position == '%') { + bool in_specification = true; + bool modifier_l = false; + while (in_specification) { + // Eat up characters until reaching a known specifier. + if (*++position == '\0') { + // The format string ended in the middle of a specification. Call + // it portable because no unportable specifications were found. The + // string is equally broken on all platforms. + return true; + } + + if (*position == 'l') { + // 'l' is the only thing that can save the 's' and 'c' specifiers. + modifier_l = true; + } else if (((*position == 's' || *position == 'c') && !modifier_l) || + *position == 'S' || *position == 'C' || *position == 'F' || + *position == 'D' || *position == 'O' || *position == 'U') { + // Not portable. + return false; + } + + if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { + // Portable, keep scanning the rest of the format string. + in_specification = false; + } + } + } + } + + return true; +} + +namespace { + +template<typename StringType> +StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToLowerASCII(str[i])); + return ret; +} + +template<typename StringType> +StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { + StringType ret; + ret.reserve(str.size()); + for (size_t i = 0; i < str.size(); i++) + ret.push_back(ToUpperASCII(str[i])); + return ret; +} + +} // namespace + +std::string ToLowerASCII(StringPiece str) { + return ToLowerASCIIImpl<std::string>(str); +} + +string16 ToLowerASCII(StringPiece16 str) { + return ToLowerASCIIImpl<string16>(str); +} + +std::string ToUpperASCII(StringPiece str) { + return ToUpperASCIIImpl<std::string>(str); +} + +string16 ToUpperASCII(StringPiece16 str) { + return ToUpperASCIIImpl<string16>(str); +} + +template<class StringType> +int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, + BasicStringPiece<StringType> b) { + // Find the first characters that aren't equal and compare them. If the end + // of one of the strings is found before a nonequal character, the lengths + // of the strings are compared. + size_t i = 0; + while (i < a.length() && i < b.length()) { + typename StringType::value_type lower_a = ToLowerASCII(a[i]); + typename StringType::value_type lower_b = ToLowerASCII(b[i]); + if (lower_a < lower_b) + return -1; + if (lower_a > lower_b) + return 1; + i++; + } + + // End of one string hit before finding a different character. Expect the + // common case to be "strings equal" at this point so check that first. + if (a.length() == b.length()) + return 0; + + if (a.length() < b.length()) + return -1; + return 1; +} + +int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { + return CompareCaseInsensitiveASCIIT<std::string>(a, b); +} + +int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { + return CompareCaseInsensitiveASCIIT<string16>(a, b); +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { + if (a.length() != b.length()) + return false; + return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; +} + +bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) { + if (a.length() != b.length()) + return false; + return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0; +} + +const std::string& EmptyString() { + static const base::NoDestructor<std::string> s; + return *s; +} + +const string16& EmptyString16() { + static const base::NoDestructor<string16> s16; + return *s16; +} + +template <class StringType> +bool ReplaceCharsT(const StringType& input, + BasicStringPiece<StringType> find_any_of_these, + BasicStringPiece<StringType> replace_with, + StringType* output); + +bool ReplaceChars(const string16& input, + StringPiece16 replace_chars, + StringPiece16 replace_with, + string16* output) { + return ReplaceCharsT(input, replace_chars, replace_with, output); +} + +bool ReplaceChars(const std::string& input, + StringPiece replace_chars, + StringPiece replace_with, + std::string* output) { + return ReplaceCharsT(input, replace_chars, replace_with, output); +} + +bool RemoveChars(const string16& input, + StringPiece16 remove_chars, + string16* output) { + return ReplaceCharsT(input, remove_chars, StringPiece16(), output); +} + +bool RemoveChars(const std::string& input, + StringPiece remove_chars, + std::string* output) { + return ReplaceCharsT(input, remove_chars, StringPiece(), output); +} + +template <typename Str> +TrimPositions TrimStringT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions, + Str* output) { + // Find the edges of leading/trailing whitespace as desired. Need to use + // a StringPiece version of input to be able to call find* on it with the + // StringPiece version of trim_chars (normally the trim_chars will be a + // constant so avoid making a copy). + const size_t last_char = input.length() - 1; + const size_t first_good_char = + (positions & TRIM_LEADING) ? input.find_first_not_of(trim_chars) : 0; + const size_t last_good_char = (positions & TRIM_TRAILING) + ? input.find_last_not_of(trim_chars) + : last_char; + + // When the string was all trimmed, report that we stripped off characters + // from whichever position the caller was interested in. For empty input, we + // stripped no characters, but we still need to clear |output|. + if (input.empty() || first_good_char == Str::npos || + last_good_char == Str::npos) { + bool input_was_empty = input.empty(); // in case output == &input + output->clear(); + return input_was_empty ? TRIM_NONE : positions; + } + + // Trim. + output->assign(input.data() + first_good_char, + last_good_char - first_good_char + 1); + + // Return where we trimmed from. + return static_cast<TrimPositions>( + (first_good_char == 0 ? TRIM_NONE : TRIM_LEADING) | + (last_good_char == last_char ? TRIM_NONE : TRIM_TRAILING)); +} + +bool TrimString(StringPiece16 input, + StringPiece16 trim_chars, + string16* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +bool TrimString(StringPiece input, + StringPiece trim_chars, + std::string* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +template<typename Str> +BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, + BasicStringPiece<Str> trim_chars, + TrimPositions positions) { + size_t begin = (positions & TRIM_LEADING) ? + input.find_first_not_of(trim_chars) : 0; + size_t end = (positions & TRIM_TRAILING) ? + input.find_last_not_of(trim_chars) + 1 : input.size(); + return input.substr(begin, end - begin); +} + +StringPiece16 TrimString(StringPiece16 input, + StringPiece16 trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +StringPiece TrimString(StringPiece input, + StringPiece trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +void TruncateUTF8ToByteSize(const std::string& input, + const size_t byte_size, + std::string* output) { + DCHECK(output); + if (byte_size > input.length()) { + *output = input; + return; + } + DCHECK_LE(byte_size, + static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); + // Note: This cast is necessary because CBU8_NEXT uses int32_ts. + int32_t truncation_length = static_cast<int32_t>(byte_size); + int32_t char_index = truncation_length - 1; + const char* data = input.data(); + + // Using CBU8, we will move backwards from the truncation point + // to the beginning of the string looking for a valid UTF8 + // character. Once a full UTF8 character is found, we will + // truncate the string to the end of that character. + while (char_index >= 0) { + int32_t prev = char_index; + base_icu::UChar32 code_point = 0; + CBU8_NEXT(data, char_index, truncation_length, code_point); + if (!IsValidCharacter(code_point) || + !IsValidCodepoint(code_point)) { + char_index = prev - 1; + } else { + break; + } + } + + if (char_index >= 0 ) + *output = input.substr(0, char_index); + else + output->clear(); +} + +TrimPositions TrimWhitespace(StringPiece16 input, + TrimPositions positions, + string16* output) { + return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output); +} + +StringPiece16 TrimWhitespace(StringPiece16 input, + TrimPositions positions) { + return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions); +} + +TrimPositions TrimWhitespaceASCII(StringPiece input, + TrimPositions positions, + std::string* output) { + return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); +} + +StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) { + return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions); +} + +template<typename STR> +STR CollapseWhitespaceT(const STR& text, + bool trim_sequences_with_line_breaks) { + STR result; + result.resize(text.size()); + + // Set flags to pretend we're already in a trimmed whitespace sequence, so we + // will trim any leading whitespace. + bool in_whitespace = true; + bool already_trimmed = true; + + int chars_written = 0; + for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { + if (IsUnicodeWhitespace(*i)) { + if (!in_whitespace) { + // Reduce all whitespace sequences to a single space. + in_whitespace = true; + result[chars_written++] = L' '; + } + if (trim_sequences_with_line_breaks && !already_trimmed && + ((*i == '\n') || (*i == '\r'))) { + // Whitespace sequences containing CR or LF are eliminated entirely. + already_trimmed = true; + --chars_written; + } + } else { + // Non-whitespace chracters are copied straight across. + in_whitespace = false; + already_trimmed = false; + result[chars_written++] = *i; + } + } + + if (in_whitespace && !already_trimmed) { + // Any trailing whitespace is eliminated. + --chars_written; + } + + result.resize(chars_written); + return result; +} + +string16 CollapseWhitespace(const string16& text, + bool trim_sequences_with_line_breaks) { + return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +std::string CollapseWhitespaceASCII(const std::string& text, + bool trim_sequences_with_line_breaks) { + return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); +} + +bool ContainsOnlyChars(StringPiece input, StringPiece characters) { + return input.find_first_not_of(characters) == StringPiece::npos; +} + +bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) { + return input.find_first_not_of(characters) == StringPiece16::npos; +} + +template <class Char> +inline bool DoIsStringASCII(const Char* characters, size_t length) { + if (!length) + return true; + constexpr MachineWord non_ascii_bit_mask = NonASCIIMask<Char>::value(); + MachineWord all_char_bits = 0; + const Char* end = characters + length; + + // Prologue: align the input. + while (!IsMachineWordAligned(characters) && characters < end) + all_char_bits |= *characters++; + if (all_char_bits & non_ascii_bit_mask) + return false; + + // Compare the values of CPU word size. + constexpr size_t chars_per_word = sizeof(MachineWord) / sizeof(Char); + constexpr int batch_count = 16; + while (characters <= end - batch_count * chars_per_word) { + all_char_bits = 0; + for (int i = 0; i < batch_count; ++i) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + if (all_char_bits & non_ascii_bit_mask) + return false; + } + + // Process the remaining words. + all_char_bits = 0; + while (characters <= end - chars_per_word) { + all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); + characters += chars_per_word; + } + + // Process the remaining bytes. + while (characters < end) + all_char_bits |= *characters++; + + return !(all_char_bits & non_ascii_bit_mask); +} + +bool IsStringASCII(StringPiece str) { + return DoIsStringASCII(str.data(), str.length()); +} + +bool IsStringASCII(StringPiece16 str) { + return DoIsStringASCII(str.data(), str.length()); +} + +#if defined(WCHAR_T_IS_UTF32) +bool IsStringASCII(WStringPiece str) { + return DoIsStringASCII(str.data(), str.length()); +} +#endif + +template <bool (*Validator)(uint32_t)> +inline static bool DoIsStringUTF8(StringPiece str) { + const char* src = str.data(); + int32_t src_len = static_cast<int32_t>(str.length()); + int32_t char_index = 0; + + while (char_index < src_len) { + int32_t code_point; + CBU8_NEXT(src, char_index, src_len, code_point); + if (!Validator(code_point)) + return false; + } + return true; +} + +bool IsStringUTF8(StringPiece str) { + return DoIsStringUTF8<IsValidCharacter>(str); +} + +bool IsStringUTF8AllowingNoncharacters(StringPiece str) { + return DoIsStringUTF8<IsValidCodepoint>(str); +} + +// Implementation note: Normally this function will be called with a hardcoded +// constant for the lowercase_ascii parameter. Constructing a StringPiece from +// a C constant requires running strlen, so the result will be two passes +// through the buffers, one to file the length of lowercase_ascii, and one to +// compare each letter. +// +// This function could have taken a const char* to avoid this and only do one +// pass through the string. But the strlen is faster than the case-insensitive +// compares and lets us early-exit in the case that the strings are different +// lengths (will often be the case for non-matches). So whether one approach or +// the other will be faster depends on the case. +// +// The hardcoded strings are typically very short so it doesn't matter, and the +// string piece gives additional flexibility for the caller (doesn't have to be +// null terminated) so we choose the StringPiece route. +template<typename Str> +static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str, + StringPiece lowercase_ascii) { + if (str.size() != lowercase_ascii.size()) + return false; + for (size_t i = 0; i < str.size(); i++) { + if (ToLowerASCII(str[i]) != lowercase_ascii[i]) + return false; + } + return true; +} + +bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { + return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii); +} + +bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { + return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii); +} + +bool EqualsASCII(StringPiece16 str, StringPiece ascii) { + if (str.length() != ascii.length()) + return false; + return std::equal(ascii.begin(), ascii.end(), str.begin()); +} + +template<typename Str> +bool StartsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(0, search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + search_for.begin(), search_for.end(), + source.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +bool StartsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity) { + return StartsWithT<std::string>(str, search_for, case_sensitivity); +} + +bool StartsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity) { + return StartsWithT<string16>(str, search_for, case_sensitivity); +} + +template <typename Str> +bool EndsWithT(BasicStringPiece<Str> str, + BasicStringPiece<Str> search_for, + CompareCase case_sensitivity) { + if (search_for.size() > str.size()) + return false; + + BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(), + search_for.size()); + + switch (case_sensitivity) { + case CompareCase::SENSITIVE: + return source == search_for; + + case CompareCase::INSENSITIVE_ASCII: + return std::equal( + source.begin(), source.end(), + search_for.begin(), + CaseInsensitiveCompareASCII<typename Str::value_type>()); + + default: + NOTREACHED(); + return false; + } +} + +bool EndsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity) { + return EndsWithT<std::string>(str, search_for, case_sensitivity); +} + +bool EndsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity) { + return EndsWithT<string16>(str, search_for, case_sensitivity); +} + +char HexDigitToInt(wchar_t c) { + DCHECK(IsHexDigit(c)); + if (c >= '0' && c <= '9') + return static_cast<char>(c - '0'); + if (c >= 'A' && c <= 'F') + return static_cast<char>(c - 'A' + 10); + if (c >= 'a' && c <= 'f') + return static_cast<char>(c - 'a' + 10); + return 0; +} + +bool IsUnicodeWhitespace(wchar_t c) { + // kWhitespaceWide is a NULL-terminated string + for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) { + if (*cur == c) + return true; + } + return false; +} + +static const char* const kByteStringsUnlocalized[] = { + " B", + " kB", + " MB", + " GB", + " TB", + " PB" +}; + +string16 FormatBytesUnlocalized(int64_t bytes) { + double unit_amount = static_cast<double>(bytes); + size_t dimension = 0; + const int kKilo = 1024; + while (unit_amount >= kKilo && + dimension < base::size(kByteStringsUnlocalized) - 1) { + unit_amount /= kKilo; + dimension++; + } + + char buf[64]; + if (bytes != 0 && dimension > 0 && unit_amount < 100) { + base::snprintf(buf, base::size(buf), "%.1lf%s", unit_amount, + kByteStringsUnlocalized[dimension]); + } else { + base::snprintf(buf, base::size(buf), "%.0lf%s", unit_amount, + kByteStringsUnlocalized[dimension]); + } + + return ASCIIToUTF16(buf); +} + +// A Matcher for DoReplaceMatchesAfterOffset() that matches substrings. +template <class StringType> +struct SubstringMatcher { + BasicStringPiece<StringType> find_this; + + size_t Find(const StringType& input, size_t pos) { + return input.find(find_this.data(), pos, find_this.length()); + } + size_t MatchSize() { return find_this.length(); } +}; + +// A Matcher for DoReplaceMatchesAfterOffset() that matches single characters. +template <class StringType> +struct CharacterMatcher { + BasicStringPiece<StringType> find_any_of_these; + + size_t Find(const StringType& input, size_t pos) { + return input.find_first_of(find_any_of_these.data(), pos, + find_any_of_these.length()); + } + constexpr size_t MatchSize() { return 1; } +}; + +enum class ReplaceType { REPLACE_ALL, REPLACE_FIRST }; + +// Runs in O(n) time in the length of |str|, and transforms the string without +// reallocating when possible. Returns |true| if any matches were found. +// +// This is parameterized on a |Matcher| traits type, so that it can be the +// implementation for both ReplaceChars() and ReplaceSubstringsAfterOffset(). +template <class StringType, class Matcher> +bool DoReplaceMatchesAfterOffset(StringType* str, + size_t initial_offset, + Matcher matcher, + BasicStringPiece<StringType> replace_with, + ReplaceType replace_type) { + using CharTraits = typename StringType::traits_type; + + const size_t find_length = matcher.MatchSize(); + if (!find_length) + return false; + + // If the find string doesn't appear, there's nothing to do. + size_t first_match = matcher.Find(*str, initial_offset); + if (first_match == StringType::npos) + return false; + + // If we're only replacing one instance, there's no need to do anything + // complicated. + const size_t replace_length = replace_with.length(); + if (replace_type == ReplaceType::REPLACE_FIRST) { + str->replace(first_match, find_length, replace_with.data(), replace_length); + return true; + } + + // If the find and replace strings are the same length, we can simply use + // replace() on each instance, and finish the entire operation in O(n) time. + if (find_length == replace_length) { + auto* buffer = &((*str)[0]); + for (size_t offset = first_match; offset != StringType::npos; + offset = matcher.Find(*str, offset + replace_length)) { + CharTraits::copy(buffer + offset, replace_with.data(), replace_length); + } + return true; + } + + // Since the find and replace strings aren't the same length, a loop like the + // one above would be O(n^2) in the worst case, as replace() will shift the + // entire remaining string each time. We need to be more clever to keep things + // O(n). + // + // When the string is being shortened, it's possible to just shift the matches + // down in one pass while finding, and truncate the length at the end of the + // search. + // + // If the string is being lengthened, more work is required. The strategy used + // here is to make two find() passes through the string. The first pass counts + // the number of matches to determine the new size. The second pass will + // either construct the new string into a new buffer (if the existing buffer + // lacked capacity), or else -- if there is room -- create a region of scratch + // space after |first_match| by shifting the tail of the string to a higher + // index, and doing in-place moves from the tail to lower indices thereafter. + size_t str_length = str->length(); + size_t expansion = 0; + if (replace_length > find_length) { + // This operation lengthens the string; determine the new length by counting + // matches. + const size_t expansion_per_match = (replace_length - find_length); + size_t num_matches = 0; + for (size_t match = first_match; match != StringType::npos; + match = matcher.Find(*str, match + find_length)) { + expansion += expansion_per_match; + ++num_matches; + } + const size_t final_length = str_length + expansion; + + if (str->capacity() < final_length) { + // If we'd have to allocate a new buffer to grow the string, build the + // result directly into the new allocation via append(). + StringType src(str->get_allocator()); + str->swap(src); + str->reserve(final_length); + + size_t pos = 0; + for (size_t match = first_match;; match = matcher.Find(src, pos)) { + str->append(src, pos, match - pos); + str->append(replace_with.data(), replace_length); + pos = match + find_length; + + // A mid-loop test/break enables skipping the final Find() call; the + // number of matches is known, so don't search past the last one. + if (!--num_matches) + break; + } + + // Handle substring after the final match. + str->append(src, pos, str_length - pos); + return true; + } + + // Prepare for the copy/move loop below -- expand the string to its final + // size by shifting the data after the first match to the end of the resized + // string. + size_t shift_src = first_match + find_length; + size_t shift_dst = shift_src + expansion; + + // Big |expansion| factors (relative to |str_length|) require padding up to + // |shift_dst|. + if (shift_dst > str_length) + str->resize(shift_dst); + + str->replace(shift_dst, str_length - shift_src, *str, shift_src, + str_length - shift_src); + str_length = final_length; + } + + // We can alternate replacement and move operations. This won't overwrite the + // unsearched region of the string so long as |write_offset| <= |read_offset|; + // that condition is always satisfied because: + // + // (a) If the string is being shortened, |expansion| is zero and + // |write_offset| grows slower than |read_offset|. + // + // (b) If the string is being lengthened, |write_offset| grows faster than + // |read_offset|, but |expansion| is big enough so that |write_offset| + // will only catch up to |read_offset| at the point of the last match. + auto* buffer = &((*str)[0]); + size_t write_offset = first_match; + size_t read_offset = first_match + expansion; + do { + if (replace_length) { + CharTraits::copy(buffer + write_offset, replace_with.data(), + replace_length); + write_offset += replace_length; + } + read_offset += find_length; + + // min() clamps StringType::npos (the largest unsigned value) to str_length. + size_t match = std::min(matcher.Find(*str, read_offset), str_length); + + size_t length = match - read_offset; + if (length) { + CharTraits::move(buffer + write_offset, buffer + read_offset, length); + write_offset += length; + read_offset += length; + } + } while (read_offset < str_length); + + // If we're shortening the string, truncate it now. + str->resize(write_offset); + return true; +} + +template <class StringType> +bool ReplaceCharsT(const StringType& input, + BasicStringPiece<StringType> find_any_of_these, + BasicStringPiece<StringType> replace_with, + StringType* output) { + // Commonly, this is called with output and input being the same string; in + // that case, this assignment is inexpensive. + *output = input; + + return DoReplaceMatchesAfterOffset( + output, 0, CharacterMatcher<StringType>{find_any_of_these}, replace_with, + ReplaceType::REPLACE_ALL); +} + +void ReplaceFirstSubstringAfterOffset(string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with) { + DoReplaceMatchesAfterOffset(str, start_offset, + SubstringMatcher<string16>{find_this}, + replace_with, ReplaceType::REPLACE_FIRST); +} + +void ReplaceFirstSubstringAfterOffset(std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with) { + DoReplaceMatchesAfterOffset(str, start_offset, + SubstringMatcher<std::string>{find_this}, + replace_with, ReplaceType::REPLACE_FIRST); +} + +void ReplaceSubstringsAfterOffset(string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with) { + DoReplaceMatchesAfterOffset(str, start_offset, + SubstringMatcher<string16>{find_this}, + replace_with, ReplaceType::REPLACE_ALL); +} + +void ReplaceSubstringsAfterOffset(std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with) { + DoReplaceMatchesAfterOffset(str, start_offset, + SubstringMatcher<std::string>{find_this}, + replace_with, ReplaceType::REPLACE_ALL); +} + +template <class string_type> +inline typename string_type::value_type* WriteIntoT(string_type* str, + size_t length_with_null) { + DCHECK_GE(length_with_null, 1u); + str->reserve(length_with_null); + str->resize(length_with_null - 1); + return &((*str)[0]); +} + +char* WriteInto(std::string* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +char16* WriteInto(string16* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +#if defined(_MSC_VER) && !defined(__clang__) +// Work around VC++ code-gen bug. https://crbug.com/804884 +#pragma optimize("", off) +#endif + +// Generic version for all JoinString overloads. |list_type| must be a sequence +// (std::vector or std::initializer_list) of strings/StringPieces (std::string, +// string16, StringPiece or StringPiece16). |string_type| is either std::string +// or string16. +template <typename list_type, typename string_type> +static string_type JoinStringT(const list_type& parts, + BasicStringPiece<string_type> sep) { + if (parts.size() == 0) + return string_type(); + + // Pre-allocate the eventual size of the string. Start with the size of all of + // the separators (note that this *assumes* parts.size() > 0). + size_t total_size = (parts.size() - 1) * sep.size(); + for (const auto& part : parts) + total_size += part.size(); + string_type result; + result.reserve(total_size); + + auto iter = parts.begin(); + DCHECK(iter != parts.end()); + AppendToString(&result, *iter); + ++iter; + + for (; iter != parts.end(); ++iter) { + sep.AppendToString(&result); + // Using the overloaded AppendToString allows this template function to work + // on both strings and StringPieces without creating an intermediate + // StringPiece object. + AppendToString(&result, *iter); + } + + // Sanity-check that we pre-allocated correctly. + DCHECK_EQ(total_size, result.size()); + + return result; +} + +std::string JoinString(const std::vector<std::string>& parts, + StringPiece separator) { + return JoinStringT(parts, separator); +} + +string16 JoinString(const std::vector<string16>& parts, + StringPiece16 separator) { + return JoinStringT(parts, separator); +} + +#if defined(_MSC_VER) && !defined(__clang__) +// Work around VC++ code-gen bug. https://crbug.com/804884 +#pragma optimize("", on) +#endif + +std::string JoinString(const std::vector<StringPiece>& parts, + StringPiece separator) { + return JoinStringT(parts, separator); +} + +string16 JoinString(const std::vector<StringPiece16>& parts, + StringPiece16 separator) { + return JoinStringT(parts, separator); +} + +std::string JoinString(std::initializer_list<StringPiece> parts, + StringPiece separator) { + return JoinStringT(parts, separator); +} + +string16 JoinString(std::initializer_list<StringPiece16> parts, + StringPiece16 separator) { + return JoinStringT(parts, separator); +} + +template<class FormatStringType, class OutStringType> +OutStringType DoReplaceStringPlaceholders( + const FormatStringType& format_string, + const std::vector<OutStringType>& subst, + std::vector<size_t>* offsets) { + size_t substitutions = subst.size(); + DCHECK_LT(substitutions, 10U); + + size_t sub_length = 0; + for (const auto& cur : subst) + sub_length += cur.length(); + + OutStringType formatted; + formatted.reserve(format_string.length() + sub_length); + + std::vector<ReplacementOffset> r_offsets; + for (auto i = format_string.begin(); i != format_string.end(); ++i) { + if ('$' == *i) { + if (i + 1 != format_string.end()) { + ++i; + if ('$' == *i) { + while (i != format_string.end() && '$' == *i) { + formatted.push_back('$'); + ++i; + } + --i; + } else { + if (*i < '1' || *i > '9') { + DLOG(ERROR) << "Invalid placeholder: $" << *i; + continue; + } + uintptr_t index = *i - '1'; + if (offsets) { + ReplacementOffset r_offset(index, + static_cast<int>(formatted.size())); + r_offsets.insert( + std::upper_bound(r_offsets.begin(), r_offsets.end(), r_offset, + &CompareParameter), + r_offset); + } + if (index < substitutions) + formatted.append(subst.at(index)); + } + } + } else { + formatted.push_back(*i); + } + } + if (offsets) { + for (const auto& cur : r_offsets) + offsets->push_back(cur.offset); + } + return formatted; +} + +string16 ReplaceStringPlaceholders(const string16& format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets) { + return DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +std::string ReplaceStringPlaceholders(StringPiece format_string, + const std::vector<std::string>& subst, + std::vector<size_t>* offsets) { + return DoReplaceStringPlaceholders(format_string, subst, offsets); +} + +string16 ReplaceStringPlaceholders(const string16& format_string, + const string16& a, + size_t* offset) { + std::vector<size_t> offsets; + std::vector<string16> subst; + subst.push_back(a); + string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); + + DCHECK_EQ(1U, offsets.size()); + if (offset) + *offset = offsets[0]; + return result; +} + +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) + +TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output) { + return TrimStringT(input, WStringPiece(kWhitespaceWide), positions, output); +} + +WStringPiece TrimWhitespace(WStringPiece input, TrimPositions positions) { + return TrimStringPieceT(input, WStringPiece(kWhitespaceWide), positions); +} + +bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output) { + return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; +} + +WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions) { + return TrimStringPieceT(input, trim_chars, positions); +} + +wchar_t* WriteInto(std::wstring* str, size_t length_with_null) { + return WriteIntoT(str, length_with_null); +} + +#endif + +// The following code is compatible with the OpenBSD lcpy interface. See: +// http://www.gratisoft.us/todd/papers/strlcpy.html +// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c + +namespace { + +template <typename CHAR> +size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. + return i; + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (dst_size != 0) + dst[dst_size - 1] = 0; + + // Count the rest of the |src|, and return it's length in characters. + while (src[dst_size]) ++dst_size; + return dst_size; +} + +} // namespace + +size_t strlcpy(char* dst, const char* src, size_t dst_size) { + return lcpyT<char>(dst, src, dst_size); +} +size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { + return lcpyT<wchar_t>(dst, src, dst_size); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_util.h b/security/sandbox/chromium/base/strings/string_util.h new file mode 100644 index 0000000000..f9f5e10ade --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util.h @@ -0,0 +1,568 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file defines utility functions for working with strings. + +#ifndef BASE_STRINGS_STRING_UTIL_H_ +#define BASE_STRINGS_STRING_UTIL_H_ + +#include <ctype.h> +#include <stdarg.h> // va_list +#include <stddef.h> +#include <stdint.h> + +#include <initializer_list> +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "base/stl_util.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" // For implicit conversions. +#include "build/build_config.h" + +namespace base { + +// C standard-library functions that aren't cross-platform are provided as +// "base::...", and their prototypes are listed below. These functions are +// then implemented as inline calls to the platform-specific equivalents in the +// platform-specific headers. + +// Wrapper for vsnprintf that always null-terminates and always returns the +// number of characters that would be in an untruncated formatted +// string, even when truncation occurs. +int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) + PRINTF_FORMAT(3, 0); + +// Some of these implementations need to be inlined. + +// We separate the declaration from the implementation of this inline +// function just so the PRINTF_FORMAT works. +inline int snprintf(char* buffer, size_t size, const char* format, ...) + PRINTF_FORMAT(3, 4); +inline int snprintf(char* buffer, size_t size, const char* format, ...) { + va_list arguments; + va_start(arguments, format); + int result = vsnprintf(buffer, size, format, arguments); + va_end(arguments); + return result; +} + +// BSD-style safe and consistent string copy functions. +// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. +// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as +// long as |dst_size| is not 0. Returns the length of |src| in characters. +// If the return value is >= dst_size, then the output was truncated. +// NOTE: All sizes are in number of characters, NOT in bytes. +BASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); +BASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); + +// Scan a wprintf format string to determine whether it's portable across a +// variety of systems. This function only checks that the conversion +// specifiers used by the format string are supported and have the same meaning +// on a variety of systems. It doesn't check for other errors that might occur +// within a format string. +// +// Nonportable conversion specifiers for wprintf are: +// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char +// data on all systems except Windows, which treat them as wchar_t data. +// Use %ls and %lc for wchar_t data instead. +// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, +// which treat them as char data. Use %ls and %lc for wchar_t data +// instead. +// - 'F', which is not identified by Windows wprintf documentation. +// - 'D', 'O', and 'U', which are deprecated and not available on all systems. +// Use %ld, %lo, and %lu instead. +// +// Note that there is no portable conversion specifier for char data when +// working with wprintf. +// +// This function is intended to be called from base::vswprintf. +BASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); + +// ASCII-specific tolower. The standard library's tolower is locale sensitive, +// so we don't want to use it here. +inline char ToLowerASCII(char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} +inline char16 ToLowerASCII(char16 c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +// ASCII-specific toupper. The standard library's toupper is locale sensitive, +// so we don't want to use it here. +inline char ToUpperASCII(char c) { + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; +} +inline char16 ToUpperASCII(char16 c) { + return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; +} + +// Converts the given string to it's ASCII-lowercase equivalent. +BASE_EXPORT std::string ToLowerASCII(StringPiece str); +BASE_EXPORT string16 ToLowerASCII(StringPiece16 str); + +// Converts the given string to it's ASCII-uppercase equivalent. +BASE_EXPORT std::string ToUpperASCII(StringPiece str); +BASE_EXPORT string16 ToUpperASCII(StringPiece16 str); + +// Functor for case-insensitive ASCII comparisons for STL algorithms like +// std::search. +// +// Note that a full Unicode version of this functor is not possible to write +// because case mappings might change the number of characters, depend on +// context (combining accents), and require handling UTF-16. If you need +// proper Unicode support, use base::i18n::ToLower/FoldCase and then just +// use a normal operator== on the result. +template<typename Char> struct CaseInsensitiveCompareASCII { + public: + bool operator()(Char x, Char y) const { + return ToLowerASCII(x) == ToLowerASCII(y); + } +}; + +// Like strcasecmp for case-insensitive ASCII characters only. Returns: +// -1 (a < b) +// 0 (a == b) +// 1 (a > b) +// (unlike strcasecmp which can return values greater or less than 1/-1). For +// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase +// and then just call the normal string operators on the result. +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b); +BASE_EXPORT int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); + +// Equality for ASCII case-insensitive comparisons. For full Unicode support, +// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either +// == or !=. +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); +BASE_EXPORT bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b); + +// These threadsafe functions return references to globally unique empty +// strings. +// +// It is likely faster to construct a new empty string object (just a few +// instructions to set the length to 0) than to get the empty string instance +// returned by these functions (which requires threadsafe static access). +// +// Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT +// CONSTRUCTORS. There is only one case where you should use these: functions +// which need to return a string by reference (e.g. as a class member +// accessor), and don't have an empty string to use (e.g. in an error case). +// These should not be used as initializers, function arguments, or return +// values for functions which return by value or outparam. +BASE_EXPORT const std::string& EmptyString(); +BASE_EXPORT const string16& EmptyString16(); + +// Contains the set of characters representing whitespace in the corresponding +// encoding. Null-terminated. The ASCII versions are the whitespaces as defined +// by HTML5, and don't include control characters. +BASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. +BASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. +BASE_EXPORT extern const char16 kWhitespaceNoCrLfUTF16[]; // Unicode w/o CR/LF. +BASE_EXPORT extern const char kWhitespaceASCII[]; +BASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. + +// Null-terminated string representing the UTF-8 byte order mark. +BASE_EXPORT extern const char kUtf8ByteOrderMark[]; + +// Removes characters in |remove_chars| from anywhere in |input|. Returns true +// if any characters were removed. |remove_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool RemoveChars(const string16& input, + StringPiece16 remove_chars, + string16* output); +BASE_EXPORT bool RemoveChars(const std::string& input, + StringPiece remove_chars, + std::string* output); + +// Replaces characters in |replace_chars| from anywhere in |input| with +// |replace_with|. Each character in |replace_chars| will be replaced with +// the |replace_with| string. Returns true if any characters were replaced. +// |replace_chars| must be null-terminated. +// NOTE: Safe to use the same variable for both |input| and |output|. +BASE_EXPORT bool ReplaceChars(const string16& input, + StringPiece16 replace_chars, + StringPiece16 replace_with, + string16* output); +BASE_EXPORT bool ReplaceChars(const std::string& input, + StringPiece replace_chars, + StringPiece replace_with, + std::string* output); + +enum TrimPositions { + TRIM_NONE = 0, + TRIM_LEADING = 1 << 0, + TRIM_TRAILING = 1 << 1, + TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, +}; + +// Removes characters in |trim_chars| from the beginning and end of |input|. +// The 8-bit version only works on 8-bit characters, not UTF-8. Returns true if +// any characters were removed. +// +// It is safe to use the same variable for both |input| and |output| (this is +// the normal usage to trim in-place). +BASE_EXPORT bool TrimString(StringPiece16 input, + StringPiece16 trim_chars, + string16* output); +BASE_EXPORT bool TrimString(StringPiece input, + StringPiece trim_chars, + std::string* output); + +// StringPiece versions of the above. The returned pieces refer to the original +// buffer. +BASE_EXPORT StringPiece16 TrimString(StringPiece16 input, + StringPiece16 trim_chars, + TrimPositions positions); +BASE_EXPORT StringPiece TrimString(StringPiece input, + StringPiece trim_chars, + TrimPositions positions); + +// Truncates a string to the nearest UTF-8 character that will leave +// the string less than or equal to the specified byte size. +BASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, + const size_t byte_size, + std::string* output); + +#if defined(WCHAR_T_IS_UTF16) +// Utility functions to access the underlying string buffer as a wide char +// pointer. +// +// Note: These functions violate strict aliasing when char16 and wchar_t are +// unrelated types. We thus pass -fno-strict-aliasing to the compiler on +// non-Windows platforms [1], and rely on it being off in Clang's CL mode [2]. +// +// [1] https://crrev.com/b9a0976622/build/config/compiler/BUILD.gn#244 +// [2] +// https://github.com/llvm/llvm-project/blob/1e28a66/clang/lib/Driver/ToolChains/Clang.cpp#L3949 +inline wchar_t* as_writable_wcstr(char16* str) { + return reinterpret_cast<wchar_t*>(str); +} + +inline wchar_t* as_writable_wcstr(string16& str) { + return reinterpret_cast<wchar_t*>(data(str)); +} + +inline const wchar_t* as_wcstr(const char16* str) { + return reinterpret_cast<const wchar_t*>(str); +} + +inline const wchar_t* as_wcstr(StringPiece16 str) { + return reinterpret_cast<const wchar_t*>(str.data()); +} + +// Utility functions to access the underlying string buffer as a char16 pointer. +inline char16* as_writable_u16cstr(wchar_t* str) { + return reinterpret_cast<char16*>(str); +} + +inline char16* as_writable_u16cstr(std::wstring& str) { + return reinterpret_cast<char16*>(data(str)); +} + +inline const char16* as_u16cstr(const wchar_t* str) { + return reinterpret_cast<const char16*>(str); +} + +inline const char16* as_u16cstr(WStringPiece str) { + return reinterpret_cast<const char16*>(str.data()); +} + +// Utility functions to convert between base::WStringPiece and +// base::StringPiece16. +inline WStringPiece AsWStringPiece(StringPiece16 str) { + return WStringPiece(as_wcstr(str.data()), str.size()); +} + +inline StringPiece16 AsStringPiece16(WStringPiece str) { + return StringPiece16(as_u16cstr(str.data()), str.size()); +} + +inline std::wstring AsWString(StringPiece16 str) { + return std::wstring(as_wcstr(str.data()), str.size()); +} + +inline string16 AsString16(WStringPiece str) { + return string16(as_u16cstr(str.data()), str.size()); +} +#endif // defined(WCHAR_T_IS_UTF16) + +// Trims any whitespace from either end of the input string. +// +// The StringPiece versions return a substring referencing the input buffer. +// The ASCII versions look only for ASCII whitespace. +// +// The std::string versions return where whitespace was found. +// NOTE: Safe to use the same variable for both input and output. +BASE_EXPORT TrimPositions TrimWhitespace(StringPiece16 input, + TrimPositions positions, + string16* output); +BASE_EXPORT StringPiece16 TrimWhitespace(StringPiece16 input, + TrimPositions positions); +BASE_EXPORT TrimPositions TrimWhitespaceASCII(StringPiece input, + TrimPositions positions, + std::string* output); +BASE_EXPORT StringPiece TrimWhitespaceASCII(StringPiece input, + TrimPositions positions); + +// Searches for CR or LF characters. Removes all contiguous whitespace +// strings that contain them. This is useful when trying to deal with text +// copied from terminals. +// Returns |text|, with the following three transformations: +// (1) Leading and trailing whitespace is trimmed. +// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace +// sequences containing a CR or LF are trimmed. +// (3) All other whitespace sequences are converted to single spaces. +BASE_EXPORT string16 CollapseWhitespace( + const string16& text, + bool trim_sequences_with_line_breaks); +BASE_EXPORT std::string CollapseWhitespaceASCII( + const std::string& text, + bool trim_sequences_with_line_breaks); + +// Returns true if |input| is empty or contains only characters found in +// |characters|. +BASE_EXPORT bool ContainsOnlyChars(StringPiece input, StringPiece characters); +BASE_EXPORT bool ContainsOnlyChars(StringPiece16 input, + StringPiece16 characters); + +// Returns true if |str| is structurally valid UTF-8 and also doesn't +// contain any non-character code point (e.g. U+10FFFE). Prohibiting +// non-characters increases the likelihood of detecting non-UTF-8 in +// real-world text, for callers which do not need to accept +// non-characters in strings. +BASE_EXPORT bool IsStringUTF8(StringPiece str); + +// Returns true if |str| contains valid UTF-8, allowing non-character +// code points. +BASE_EXPORT bool IsStringUTF8AllowingNoncharacters(StringPiece str); + +// Returns true if |str| contains only valid ASCII character values. +// Note 1: IsStringASCII executes in time determined solely by the +// length of the string, not by its contents, so it is robust against +// timing attacks for all strings of equal length. +// Note 2: IsStringASCII assumes the input is likely all ASCII, and +// does not leave early if it is not the case. +BASE_EXPORT bool IsStringASCII(StringPiece str); +BASE_EXPORT bool IsStringASCII(StringPiece16 str); +#if defined(WCHAR_T_IS_UTF32) +BASE_EXPORT bool IsStringASCII(WStringPiece str); +#endif + +// Compare the lower-case form of the given string against the given +// previously-lower-cased ASCII string (typically a constant). +BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str, + StringPiece lowecase_ascii); +BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str, + StringPiece lowecase_ascii); + +// Performs a case-sensitive string compare of the given 16-bit string against +// the given 8-bit ASCII string (typically a constant). The behavior is +// undefined if the |ascii| string is not ASCII. +BASE_EXPORT bool EqualsASCII(StringPiece16 str, StringPiece ascii); + +// Indicates case sensitivity of comparisons. Only ASCII case insensitivity +// is supported. Full Unicode case-insensitive conversions would need to go in +// base/i18n so it can use ICU. +// +// If you need to do Unicode-aware case-insensitive StartsWith/EndsWith, it's +// best to call base::i18n::ToLower() or base::i18n::FoldCase() (see +// base/i18n/case_conversion.h for usage advice) on the arguments, and then use +// the results to a case-sensitive comparison. +enum class CompareCase { + SENSITIVE, + INSENSITIVE_ASCII, +}; + +BASE_EXPORT bool StartsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool StartsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool EndsWith(StringPiece str, + StringPiece search_for, + CompareCase case_sensitivity); +BASE_EXPORT bool EndsWith(StringPiece16 str, + StringPiece16 search_for, + CompareCase case_sensitivity); + +// Determines the type of ASCII character, independent of locale (the C +// library versions will change based on locale). +template <typename Char> +inline bool IsAsciiWhitespace(Char c) { + return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\f'; +} +template <typename Char> +inline bool IsAsciiAlpha(Char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} +template <typename Char> +inline bool IsAsciiUpper(Char c) { + return c >= 'A' && c <= 'Z'; +} +template <typename Char> +inline bool IsAsciiLower(Char c) { + return c >= 'a' && c <= 'z'; +} +template <typename Char> +inline bool IsAsciiDigit(Char c) { + return c >= '0' && c <= '9'; +} +template <typename Char> +inline bool IsAsciiPrintable(Char c) { + return c >= ' ' && c <= '~'; +} + +template <typename Char> +inline bool IsHexDigit(Char c) { + return (c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f'); +} + +// Returns the integer corresponding to the given hex character. For example: +// '4' -> 4 +// 'a' -> 10 +// 'B' -> 11 +// Assumes the input is a valid hex character. DCHECKs in debug builds if not. +BASE_EXPORT char HexDigitToInt(wchar_t c); + +// Returns true if it's a Unicode whitespace character. +BASE_EXPORT bool IsUnicodeWhitespace(wchar_t c); + +// Return a byte string in human-readable format with a unit suffix. Not +// appropriate for use in any UI; use of FormatBytes and friends in ui/base is +// highly recommended instead. TODO(avi): Figure out how to get callers to use +// FormatBytes instead; remove this. +BASE_EXPORT string16 FormatBytesUnlocalized(int64_t bytes); + +// Starting at |start_offset| (usually 0), replace the first instance of +// |find_this| with |replace_with|. +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + base::string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); +BASE_EXPORT void ReplaceFirstSubstringAfterOffset( + std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with); + +// Starting at |start_offset| (usually 0), look through |str| and replace all +// instances of |find_this| with |replace_with|. +// +// This does entire substrings; use std::replace in <algorithm> for single +// characters, for example: +// std::replace(str.begin(), str.end(), 'a', 'b'); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + string16* str, + size_t start_offset, + StringPiece16 find_this, + StringPiece16 replace_with); +BASE_EXPORT void ReplaceSubstringsAfterOffset( + std::string* str, + size_t start_offset, + StringPiece find_this, + StringPiece replace_with); + +// Reserves enough memory in |str| to accommodate |length_with_null| characters, +// sets the size of |str| to |length_with_null - 1| characters, and returns a +// pointer to the underlying contiguous array of characters. This is typically +// used when calling a function that writes results into a character array, but +// the caller wants the data to be managed by a string-like object. It is +// convenient in that is can be used inline in the call, and fast in that it +// avoids copying the results of the call from a char* into a string. +// +// Internally, this takes linear time because the resize() call 0-fills the +// underlying array for potentially all +// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we +// could avoid this aspect of the resize() call, as we expect the caller to +// immediately write over this memory, but there is no other way to set the size +// of the string, and not doing that will mean people who access |str| rather +// than str.c_str() will get back a string of whatever size |str| had on entry +// to this function (probably 0). +BASE_EXPORT char* WriteInto(std::string* str, size_t length_with_null); +BASE_EXPORT char16* WriteInto(string16* str, size_t length_with_null); + +// Joins a vector or list of strings into a single string, inserting |separator| +// (which may be empty) in between all elements. +// +// Note this is inverse of SplitString()/SplitStringPiece() defined in +// string_split.h. +// +// If possible, callers should build a vector of StringPieces and use the +// StringPiece variant, so that they do not create unnecessary copies of +// strings. For example, instead of using SplitString, modifying the vector, +// then using JoinString, use SplitStringPiece followed by JoinString so that no +// copies of those strings are created until the final join operation. +// +// Use StrCat (in base/strings/strcat.h) if you don't need a separator. +BASE_EXPORT std::string JoinString(const std::vector<std::string>& parts, + StringPiece separator); +BASE_EXPORT string16 JoinString(const std::vector<string16>& parts, + StringPiece16 separator); +BASE_EXPORT std::string JoinString(const std::vector<StringPiece>& parts, + StringPiece separator); +BASE_EXPORT string16 JoinString(const std::vector<StringPiece16>& parts, + StringPiece16 separator); +// Explicit initializer_list overloads are required to break ambiguity when used +// with a literal initializer list (otherwise the compiler would not be able to +// decide between the string and StringPiece overloads). +BASE_EXPORT std::string JoinString(std::initializer_list<StringPiece> parts, + StringPiece separator); +BASE_EXPORT string16 JoinString(std::initializer_list<StringPiece16> parts, + StringPiece16 separator); + +// Replace $1-$2-$3..$9 in the format string with values from |subst|. +// Additionally, any number of consecutive '$' characters is replaced by that +// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be +// NULL. This only allows you to use up to nine replacements. +BASE_EXPORT string16 ReplaceStringPlaceholders( + const string16& format_string, + const std::vector<string16>& subst, + std::vector<size_t>* offsets); + +BASE_EXPORT std::string ReplaceStringPlaceholders( + StringPiece format_string, + const std::vector<std::string>& subst, + std::vector<size_t>* offsets); + +// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. +BASE_EXPORT string16 ReplaceStringPlaceholders(const string16& format_string, + const string16& a, + size_t* offset); + +#if defined(OS_WIN) && defined(BASE_STRING16_IS_STD_U16STRING) +BASE_EXPORT TrimPositions TrimWhitespace(WStringPiece input, + TrimPositions positions, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimWhitespace(WStringPiece input, + TrimPositions positions); + +BASE_EXPORT bool TrimString(WStringPiece input, + WStringPiece trim_chars, + std::wstring* output); + +BASE_EXPORT WStringPiece TrimString(WStringPiece input, + WStringPiece trim_chars, + TrimPositions positions); + +BASE_EXPORT wchar_t* WriteInto(std::wstring* str, size_t length_with_null); +#endif + +} // namespace base + +#if defined(OS_WIN) +#include "base/strings/string_util_win.h" +#elif defined(OS_POSIX) || defined(OS_FUCHSIA) +#include "base/strings/string_util_posix.h" +#else +#error Define string operations appropriately for your platform +#endif + +#endif // BASE_STRINGS_STRING_UTIL_H_ diff --git a/security/sandbox/chromium/base/strings/string_util_constants.cc b/security/sandbox/chromium/base/strings/string_util_constants.cc new file mode 100644 index 0000000000..212c5ab082 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_constants.cc @@ -0,0 +1,54 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/string_util.h" + +namespace base { + +#define WHITESPACE_ASCII_NO_CR_LF \ + 0x09, /* CHARACTER TABULATION */ \ + 0x0B, /* LINE TABULATION */ \ + 0x0C, /* FORM FEED (FF) */ \ + 0x20 /* SPACE */ + +#define WHITESPACE_ASCII \ + WHITESPACE_ASCII_NO_CR_LF, /* Comment to make clang-format linebreak */ \ + 0x0A, /* LINE FEED (LF) */ \ + 0x0D /* CARRIAGE RETURN (CR) */ + +#define WHITESPACE_UNICODE_NON_ASCII \ + 0x0085, /* NEXT LINE (NEL) */ \ + 0x00A0, /* NO-BREAK SPACE */ \ + 0x1680, /* OGHAM SPACE MARK */ \ + 0x2000, /* EN QUAD */ \ + 0x2001, /* EM QUAD */ \ + 0x2002, /* EN SPACE */ \ + 0x2003, /* EM SPACE */ \ + 0x2004, /* THREE-PER-EM SPACE */ \ + 0x2005, /* FOUR-PER-EM SPACE */ \ + 0x2006, /* SIX-PER-EM SPACE */ \ + 0x2007, /* FIGURE SPACE */ \ + 0x2008, /* PUNCTUATION SPACE */ \ + 0x2009, /* THIN SPACE */ \ + 0x200A, /* HAIR SPACE */ \ + 0x2028, /* LINE SEPARATOR */ \ + 0x2029, /* PARAGRAPH SEPARATOR */ \ + 0x202F, /* NARROW NO-BREAK SPACE */ \ + 0x205F, /* MEDIUM MATHEMATICAL SPACE */ \ + 0x3000 /* IDEOGRAPHIC SPACE */ + +#define WHITESPACE_UNICODE_NO_CR_LF \ + WHITESPACE_ASCII_NO_CR_LF, WHITESPACE_UNICODE_NON_ASCII + +#define WHITESPACE_UNICODE WHITESPACE_ASCII, WHITESPACE_UNICODE_NON_ASCII + +const wchar_t kWhitespaceWide[] = {WHITESPACE_UNICODE, 0}; +const char16 kWhitespaceUTF16[] = {WHITESPACE_UNICODE, 0}; +const char16 kWhitespaceNoCrLfUTF16[] = {WHITESPACE_UNICODE_NO_CR_LF, 0}; +const char kWhitespaceASCII[] = {WHITESPACE_ASCII, 0}; +const char16 kWhitespaceASCIIAs16[] = {WHITESPACE_ASCII, 0}; + +const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/string_util_posix.h b/security/sandbox/chromium/base/strings/string_util_posix.h new file mode 100644 index 0000000000..8299118e10 --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_posix.h @@ -0,0 +1,37 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_POSIX_H_ +#define BASE_STRINGS_STRING_UTIL_POSIX_H_ + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return ::strdup(str); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + return ::vsnprintf(buffer, size, format, arguments); +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + return ::vswprintf(buffer, size, format, arguments); +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_POSIX_H_ diff --git a/security/sandbox/chromium/base/strings/string_util_win.h b/security/sandbox/chromium/base/strings/string_util_win.h new file mode 100644 index 0000000000..7f260bfc8b --- /dev/null +++ b/security/sandbox/chromium/base/strings/string_util_win.h @@ -0,0 +1,44 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRING_UTIL_WIN_H_ +#define BASE_STRINGS_STRING_UTIL_WIN_H_ + +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "base/logging.h" + +namespace base { + +// Chromium code style is to not use malloc'd strings; this is only for use +// for interaction with APIs that require it. +inline char* strdup(const char* str) { + return _strdup(str); +} + +inline int vsnprintf(char* buffer, size_t size, + const char* format, va_list arguments) { + int length = vsnprintf_s(buffer, size, size - 1, format, arguments); + if (length < 0) + return _vscprintf(format, arguments); + return length; +} + +inline int vswprintf(wchar_t* buffer, size_t size, + const wchar_t* format, va_list arguments) { + DCHECK(IsWprintfFormatPortable(format)); + + int length = _vsnwprintf_s(buffer, size, size - 1, format, arguments); + if (length < 0) + return _vscwprintf(format, arguments); + return length; +} + +} // namespace base + +#endif // BASE_STRINGS_STRING_UTIL_WIN_H_ diff --git a/security/sandbox/chromium/base/strings/stringprintf.cc b/security/sandbox/chromium/base/strings/stringprintf.cc new file mode 100644 index 0000000000..738cc63bbe --- /dev/null +++ b/security/sandbox/chromium/base/strings/stringprintf.cc @@ -0,0 +1,225 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/stringprintf.h" + +#include <errno.h> +#include <stddef.h> + +#include <vector> + +#include "base/scoped_clear_last_error.h" +#include "base/stl_util.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter +// is the size of the buffer. These return the number of characters in the +// formatted string excluding the NUL terminator. If the buffer is not +// large enough to accommodate the formatted string without truncation, they +// return the number of characters that would be in the fully-formatted string +// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms). +inline int vsnprintfT(char* buffer, + size_t buf_size, + const char* format, + va_list argptr) { + return base::vsnprintf(buffer, buf_size, format, argptr); +} + +#if defined(OS_WIN) +inline int vsnprintfT(wchar_t* buffer, + size_t buf_size, + const wchar_t* format, + va_list argptr) { + return base::vswprintf(buffer, buf_size, format, argptr); +} +inline int vsnprintfT(char16_t* buffer, + size_t buf_size, + const char16_t* format, + va_list argptr) { + return base::vswprintf(reinterpret_cast<wchar_t*>(buffer), buf_size, + reinterpret_cast<const wchar_t*>(format), argptr); +} +#endif + +// Templatized backend for StringPrintF/StringAppendF. This does not finalize +// the va_list, the caller is expected to do that. +template <class CharT> +static void StringAppendVT(std::basic_string<CharT>* dst, + const CharT* format, + va_list ap) { + // First try with a small fixed size buffer. + // This buffer size should be kept in sync with StringUtilTest.GrowBoundary + // and StringUtilTest.StringPrintfBounds. + CharT stack_buf[1024]; + + va_list ap_copy; + va_copy(ap_copy, ap); + + base::internal::ScopedClearLastError last_error; + int result = vsnprintfT(stack_buf, base::size(stack_buf), format, ap_copy); + va_end(ap_copy); + + if (result >= 0 && result < static_cast<int>(base::size(stack_buf))) { + // It fit. + dst->append(stack_buf, result); + return; + } + + // Repeatedly increase buffer size until it fits. + int mem_length = base::size(stack_buf); + while (true) { + if (result < 0) { +#if defined(OS_WIN) + // On Windows, vsnprintfT always returns the number of characters in a + // fully-formatted string, so if we reach this point, something else is + // wrong and no amount of buffer-doubling is going to fix it. + return; +#else + if (errno != 0 && errno != EOVERFLOW) + return; + // Try doubling the buffer size. + mem_length *= 2; +#endif + } else { + // We need exactly "result + 1" characters. + mem_length = result + 1; + } + + if (mem_length > 32 * 1024 * 1024) { + // That should be plenty, don't try anything larger. This protects + // against huge allocations when using vsnprintfT implementations that + // return -1 for reasons other than overflow without setting errno. + DLOG(WARNING) << "Unable to printf the requested string due to size."; + return; + } + + std::vector<CharT> mem_buf(mem_length); + + // NOTE: You can only use a va_list once. Since we're in a while loop, we + // need to make a new copy each time so we don't use up the original. + va_copy(ap_copy, ap); + result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy); + va_end(ap_copy); + + if ((result >= 0) && (result < mem_length)) { + // It fit. + dst->append(&mem_buf[0], result); + return; + } + } +} + +} // namespace + +std::string StringPrintf(const char* format, ...) { + va_list ap; + va_start(ap, format); + std::string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +#if defined(OS_WIN) +std::wstring StringPrintf(const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + std::wstring result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} + +std::u16string StringPrintf(const char16_t* format, ...) { + va_list ap; + va_start(ap, format); + std::u16string result; + StringAppendV(&result, format, ap); + va_end(ap); + return result; +} +#endif + +std::string StringPrintV(const char* format, va_list ap) { + std::string result; + StringAppendV(&result, format, ap); + return result; +} + +const std::string& SStringPrintf(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} + +#if defined(OS_WIN) +const std::wstring& SStringPrintf(std::wstring* dst, + const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} + +const std::u16string& SStringPrintf(std::u16string* dst, + const char16_t* format, + ...) { + va_list ap; + va_start(ap, format); + dst->clear(); + StringAppendV(dst, format, ap); + va_end(ap); + return *dst; +} +#endif + +void StringAppendF(std::string* dst, const char* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +#if defined(OS_WIN) +void StringAppendF(std::wstring* dst, const wchar_t* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} + +void StringAppendF(std::u16string* dst, const char16_t* format, ...) { + va_list ap; + va_start(ap, format); + StringAppendV(dst, format, ap); + va_end(ap); +} +#endif + +void StringAppendV(std::string* dst, const char* format, va_list ap) { + StringAppendVT(dst, format, ap); +} + +#if defined(OS_WIN) +void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { + StringAppendVT(dst, format, ap); +} + +void StringAppendV(std::u16string* dst, const char16_t* format, va_list ap) { + StringAppendVT(dst, format, ap); +} +#endif + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/stringprintf.h b/security/sandbox/chromium/base/strings/stringprintf.h new file mode 100644 index 0000000000..a8d5bc84e9 --- /dev/null +++ b/security/sandbox/chromium/base/strings/stringprintf.h @@ -0,0 +1,74 @@ +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_STRINGPRINTF_H_ +#define BASE_STRINGS_STRINGPRINTF_H_ + +#include <stdarg.h> // va_list + +#include <string> + +#include "base/base_export.h" +#include "base/compiler_specific.h" +#include "build/build_config.h" + +namespace base { + +// Return a C++ string given printf-like input. +BASE_EXPORT std::string StringPrintf(const char* format, ...) + PRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +#if defined(OS_WIN) +// Note: Unfortunately compile time checking of the format string for UTF-16 +// strings is not supported by any compiler, thus these functions should be used +// carefully and sparingly. Also applies to SStringPrintf and StringAppendV +// below. +BASE_EXPORT std::wstring StringPrintf(const wchar_t* format, ...) + WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +BASE_EXPORT std::u16string StringPrintf(const char16_t* format, ...) + WPRINTF_FORMAT(1, 2) WARN_UNUSED_RESULT; +#endif + +// Return a C++ string given vprintf-like input. +BASE_EXPORT std::string StringPrintV(const char* format, va_list ap) + PRINTF_FORMAT(1, 0) WARN_UNUSED_RESULT; + +// Store result into a supplied string and return it. +BASE_EXPORT const std::string& SStringPrintf(std::string* dst, + const char* format, + ...) PRINTF_FORMAT(2, 3); +#if defined(OS_WIN) +BASE_EXPORT const std::wstring& SStringPrintf(std::wstring* dst, + const wchar_t* format, + ...) WPRINTF_FORMAT(2, 3); +BASE_EXPORT const std::u16string& SStringPrintf(std::u16string* dst, + const char16_t* format, + ...) WPRINTF_FORMAT(2, 3); +#endif + +// Append result to a supplied string. +BASE_EXPORT void StringAppendF(std::string* dst, const char* format, ...) + PRINTF_FORMAT(2, 3); +#if defined(OS_WIN) +BASE_EXPORT void StringAppendF(std::wstring* dst, const wchar_t* format, ...) + WPRINTF_FORMAT(2, 3); +BASE_EXPORT void StringAppendF(std::u16string* dst, const char16_t* format, ...) + WPRINTF_FORMAT(2, 3); +#endif + +// Lower-level routine that takes a va_list and appends to a specified +// string. All other routines are just convenience wrappers around it. +BASE_EXPORT void StringAppendV(std::string* dst, const char* format, va_list ap) + PRINTF_FORMAT(2, 0); +#if defined(OS_WIN) +BASE_EXPORT void StringAppendV(std::wstring* dst, + const wchar_t* format, + va_list ap) WPRINTF_FORMAT(2, 0); +BASE_EXPORT void StringAppendV(std::u16string* dst, + const char16_t* format, + va_list ap) WPRINTF_FORMAT(2, 0); +#endif + +} // namespace base + +#endif // BASE_STRINGS_STRINGPRINTF_H_ diff --git a/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc new file mode 100644 index 0000000000..f7682c1be9 --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.cc @@ -0,0 +1,155 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/utf_string_conversion_utils.h" + +#include "base/third_party/icu/icu_utf.h" +#include "build/build_config.h" + +namespace base { + +// ReadUnicodeCharacter -------------------------------------------------------- + +bool ReadUnicodeCharacter(const char* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point_out) { + // U8_NEXT expects to be able to use -1 to signal an error, so we must + // use a signed type for code_point. But this function returns false + // on error anyway, so code_point_out is unsigned. + int32_t code_point; + CBU8_NEXT(src, *char_index, src_len, code_point); + *code_point_out = static_cast<uint32_t>(code_point); + + // The ICU macro above moves to the next char, we want to point to the last + // char consumed. + (*char_index)--; + + // Validate the decoded value. + return IsValidCodepoint(code_point); +} + +bool ReadUnicodeCharacter(const char16* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point) { + if (CBU16_IS_SURROGATE(src[*char_index])) { + if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || + *char_index + 1 >= src_len || + !CBU16_IS_TRAIL(src[*char_index + 1])) { + // Invalid surrogate pair. + return false; + } + + // Valid surrogate pair. + *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index], + src[*char_index + 1]); + (*char_index)++; + } else { + // Not a surrogate, just one 16-bit word. + *code_point = src[*char_index]; + } + + return IsValidCodepoint(*code_point); +} + +#if defined(WCHAR_T_IS_UTF32) +bool ReadUnicodeCharacter(const wchar_t* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point) { + // Conversion is easy since the source is 32-bit. + *code_point = src[*char_index]; + + // Validate the value. + return IsValidCodepoint(*code_point); +} +#endif // defined(WCHAR_T_IS_UTF32) + +// WriteUnicodeCharacter ------------------------------------------------------- + +size_t WriteUnicodeCharacter(uint32_t code_point, std::string* output) { + if (code_point <= 0x7f) { + // Fast path the common case of one byte. + output->push_back(static_cast<char>(code_point)); + return 1; + } + + + // CBU8_APPEND_UNSAFE can append up to 4 bytes. + size_t char_offset = output->length(); + size_t original_char_offset = char_offset; + output->resize(char_offset + CBU8_MAX_LENGTH); + + CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + + // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so + // it will represent the new length of the string. + output->resize(char_offset); + return char_offset - original_char_offset; +} + +size_t WriteUnicodeCharacter(uint32_t code_point, string16* output) { + if (CBU16_LENGTH(code_point) == 1) { + // Thie code point is in the Basic Multilingual Plane (BMP). + output->push_back(static_cast<char16>(code_point)); + return 1; + } + // Non-BMP characters use a double-character encoding. + size_t char_offset = output->length(); + output->resize(char_offset + CBU16_MAX_LENGTH); + CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); + return CBU16_MAX_LENGTH; +} + +// Generalized Unicode converter ----------------------------------------------- + +template<typename CHAR> +void PrepareForUTF8Output(const CHAR* src, + size_t src_len, + std::string* output) { + output->clear(); + if (src_len == 0) + return; + if (src[0] < 0x80) { + // Assume that the entire input will be ASCII. + output->reserve(src_len); + } else { + // Assume that the entire input is non-ASCII and will have 3 bytes per char. + output->reserve(src_len * 3); + } +} + +// Instantiate versions we know callers will need. +#if !defined(OS_WIN) +// wchar_t and char16 are the same thing on Windows. +template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*); +#endif +template void PrepareForUTF8Output(const char16*, size_t, std::string*); + +template<typename STRING> +void PrepareForUTF16Or32Output(const char* src, + size_t src_len, + STRING* output) { + output->clear(); + if (src_len == 0) + return; + if (static_cast<unsigned char>(src[0]) < 0x80) { + // Assume the input is all ASCII, which means 1:1 correspondence. + output->reserve(src_len); + } else { + // Otherwise assume that the UTF-8 sequences will have 2 bytes for each + // character. + output->reserve(src_len / 2); + } +} + +// Instantiate versions we know callers will need. +#if !defined(OS_WIN) +// std::wstring and string16 are the same thing on Windows. +template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*); +#endif +template void PrepareForUTF16Or32Output(const char*, size_t, string16*); + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h new file mode 100644 index 0000000000..01d24c3e2e --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversion_utils.h @@ -0,0 +1,103 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ +#define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ + +// Low-level UTF handling functions. Most code will want to use the functions +// in utf_string_conversions.h + +#include <stddef.h> +#include <stdint.h> + +#include "base/base_export.h" +#include "base/strings/string16.h" + +namespace base { + +inline bool IsValidCodepoint(uint32_t code_point) { + // Excludes code points that are not Unicode scalar values, i.e. + // surrogate code points ([0xD800, 0xDFFF]). Additionally, excludes + // code points larger than 0x10FFFF (the highest codepoint allowed). + // Non-characters and unassigned code points are allowed. + // https://unicode.org/glossary/#unicode_scalar_value + return code_point < 0xD800u || + (code_point >= 0xE000u && code_point <= 0x10FFFFu); +} + +inline bool IsValidCharacter(uint32_t code_point) { + // Excludes non-characters (U+FDD0..U+FDEF, and all code points + // ending in 0xFFFE or 0xFFFF) from the set of valid code points. + // https://unicode.org/faq/private_use.html#nonchar1 + return code_point < 0xD800u || (code_point >= 0xE000u && + code_point < 0xFDD0u) || (code_point > 0xFDEFu && + code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu); +} + +// ReadUnicodeCharacter -------------------------------------------------------- + +// Reads a UTF-8 stream, placing the next code point into the given output +// |*code_point|. |src| represents the entire string to read, and |*char_index| +// is the character offset within the string to start reading at. |*char_index| +// will be updated to index the last character read, such that incrementing it +// (as in a for loop) will take the reader to the next character. +// +// Returns true on success. On false, |*code_point| will be invalid. +BASE_EXPORT bool ReadUnicodeCharacter(const char* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point_out); + +// Reads a UTF-16 character. The usage is the same as the 8-bit version above. +BASE_EXPORT bool ReadUnicodeCharacter(const char16* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point); + +#if defined(WCHAR_T_IS_UTF32) +// Reads UTF-32 character. The usage is the same as the 8-bit version above. +BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src, + int32_t src_len, + int32_t* char_index, + uint32_t* code_point); +#endif // defined(WCHAR_T_IS_UTF32) + +// WriteUnicodeCharacter ------------------------------------------------------- + +// Appends a UTF-8 character to the given 8-bit string. Returns the number of +// bytes written. +BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, + std::string* output); + +// Appends the given code point as a UTF-16 character to the given 16-bit +// string. Returns the number of 16-bit values written. +BASE_EXPORT size_t WriteUnicodeCharacter(uint32_t code_point, string16* output); + +#if defined(WCHAR_T_IS_UTF32) +// Appends the given UTF-32 character to the given 32-bit string. Returns the +// number of 32-bit values written. +inline size_t WriteUnicodeCharacter(uint32_t code_point, std::wstring* output) { + // This is the easy case, just append the character. + output->push_back(code_point); + return 1; +} +#endif // defined(WCHAR_T_IS_UTF32) + +// Generalized Unicode converter ----------------------------------------------- + +// Guesses the length of the output in UTF-8 in bytes, clears that output +// string, and reserves that amount of space. We assume that the input +// character types are unsigned, which will be true for UTF-16 and -32 on our +// systems. +template<typename CHAR> +void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); + +// Prepares an output buffer (containing either UTF-16 or -32 data) given some +// UTF-8 input that will be converted to it. See PrepareForUTF8Output(). +template<typename STRING> +void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); + +} // namespace base + +#endif // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ diff --git a/security/sandbox/chromium/base/strings/utf_string_conversions.cc b/security/sandbox/chromium/base/strings/utf_string_conversions.cc new file mode 100644 index 0000000000..9a79889159 --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversions.cc @@ -0,0 +1,342 @@ +// Copyright (c) 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/strings/utf_string_conversions.h" + +#include <limits.h> +#include <stdint.h> + +#include <type_traits> + +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversion_utils.h" +#include "base/third_party/icu/icu_utf.h" +#include "build/build_config.h" + +namespace base { + +namespace { + +constexpr int32_t kErrorCodePoint = 0xFFFD; + +// Size coefficient ---------------------------------------------------------- +// The maximum number of codeunits in the destination encoding corresponding to +// one codeunit in the source encoding. + +template <typename SrcChar, typename DestChar> +struct SizeCoefficient { + static_assert(sizeof(SrcChar) < sizeof(DestChar), + "Default case: from a smaller encoding to the bigger one"); + + // ASCII symbols are encoded by one codeunit in all encodings. + static constexpr int value = 1; +}; + +template <> +struct SizeCoefficient<char16, char> { + // One UTF-16 codeunit corresponds to at most 3 codeunits in UTF-8. + static constexpr int value = 3; +}; + +#if defined(WCHAR_T_IS_UTF32) +template <> +struct SizeCoefficient<wchar_t, char> { + // UTF-8 uses at most 4 codeunits per character. + static constexpr int value = 4; +}; + +template <> +struct SizeCoefficient<wchar_t, char16> { + // UTF-16 uses at most 2 codeunits per character. + static constexpr int value = 2; +}; +#endif // defined(WCHAR_T_IS_UTF32) + +template <typename SrcChar, typename DestChar> +constexpr int size_coefficient_v = + SizeCoefficient<std::decay_t<SrcChar>, std::decay_t<DestChar>>::value; + +// UnicodeAppendUnsafe -------------------------------------------------------- +// Function overloads that write code_point to the output string. Output string +// has to have enough space for the codepoint. + +// Convenience typedef that checks whether the passed in type is integral (i.e. +// bool, char, int or their extended versions) and is of the correct size. +template <typename Char, size_t N> +using EnableIfBitsAre = std::enable_if_t<std::is_integral<Char>::value && + CHAR_BIT * sizeof(Char) == N, + bool>; + +template <typename Char, EnableIfBitsAre<Char, 8> = true> +void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) { + CBU8_APPEND_UNSAFE(out, *size, code_point); +} + +template <typename Char, EnableIfBitsAre<Char, 16> = true> +void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) { + CBU16_APPEND_UNSAFE(out, *size, code_point); +} + +template <typename Char, EnableIfBitsAre<Char, 32> = true> +void UnicodeAppendUnsafe(Char* out, int32_t* size, uint32_t code_point) { + out[(*size)++] = code_point; +} + +// DoUTFConversion ------------------------------------------------------------ +// Main driver of UTFConversion specialized for different Src encodings. +// dest has to have enough room for the converted text. + +template <typename DestChar> +bool DoUTFConversion(const char* src, + int32_t src_len, + DestChar* dest, + int32_t* dest_len) { + bool success = true; + + for (int32_t i = 0; i < src_len;) { + int32_t code_point; + CBU8_NEXT(src, i, src_len, code_point); + + if (!IsValidCodepoint(code_point)) { + success = false; + code_point = kErrorCodePoint; + } + + UnicodeAppendUnsafe(dest, dest_len, code_point); + } + + return success; +} + +template <typename DestChar> +bool DoUTFConversion(const char16* src, + int32_t src_len, + DestChar* dest, + int32_t* dest_len) { + bool success = true; + + auto ConvertSingleChar = [&success](char16 in) -> int32_t { + if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) { + success = false; + return kErrorCodePoint; + } + return in; + }; + + int32_t i = 0; + + // Always have another symbol in order to avoid checking boundaries in the + // middle of the surrogate pair. + while (i < src_len - 1) { + int32_t code_point; + + if (CBU16_IS_LEAD(src[i]) && CBU16_IS_TRAIL(src[i + 1])) { + code_point = CBU16_GET_SUPPLEMENTARY(src[i], src[i + 1]); + if (!IsValidCodepoint(code_point)) { + code_point = kErrorCodePoint; + success = false; + } + i += 2; + } else { + code_point = ConvertSingleChar(src[i]); + ++i; + } + + UnicodeAppendUnsafe(dest, dest_len, code_point); + } + + if (i < src_len) + UnicodeAppendUnsafe(dest, dest_len, ConvertSingleChar(src[i])); + + return success; +} + +#if defined(WCHAR_T_IS_UTF32) + +template <typename DestChar> +bool DoUTFConversion(const wchar_t* src, + int32_t src_len, + DestChar* dest, + int32_t* dest_len) { + bool success = true; + + for (int32_t i = 0; i < src_len; ++i) { + int32_t code_point = src[i]; + + if (!IsValidCodepoint(code_point)) { + success = false; + code_point = kErrorCodePoint; + } + + UnicodeAppendUnsafe(dest, dest_len, code_point); + } + + return success; +} + +#endif // defined(WCHAR_T_IS_UTF32) + +// UTFConversion -------------------------------------------------------------- +// Function template for generating all UTF conversions. + +template <typename InputString, typename DestString> +bool UTFConversion(const InputString& src_str, DestString* dest_str) { + if (IsStringASCII(src_str)) { + dest_str->assign(src_str.begin(), src_str.end()); + return true; + } + + dest_str->resize(src_str.length() * + size_coefficient_v<typename InputString::value_type, + typename DestString::value_type>); + + // Empty string is ASCII => it OK to call operator[]. + auto* dest = &(*dest_str)[0]; + + // ICU requires 32 bit numbers. + int32_t src_len32 = static_cast<int32_t>(src_str.length()); + int32_t dest_len32 = 0; + + bool res = DoUTFConversion(src_str.data(), src_len32, dest, &dest_len32); + + dest_str->resize(dest_len32); + dest_str->shrink_to_fit(); + + return res; +} + +} // namespace + +// UTF16 <-> UTF8 -------------------------------------------------------------- + +bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { + return UTFConversion(StringPiece(src, src_len), output); +} + +string16 UTF8ToUTF16(StringPiece utf8) { + string16 ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + UTF8ToUTF16(utf8.data(), utf8.size(), &ret); + return ret; +} + +bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { + return UTFConversion(StringPiece16(src, src_len), output); +} + +std::string UTF16ToUTF8(StringPiece16 utf16) { + std::string ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + UTF16ToUTF8(utf16.data(), utf16.length(), &ret); + return ret; +} + +// UTF-16 <-> Wide ------------------------------------------------------------- + +#if defined(WCHAR_T_IS_UTF16) +// When wide == UTF-16 the conversions are a NOP. + +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { + output->assign(src, src + src_len); + return true; +} + +string16 WideToUTF16(WStringPiece wide) { + return string16(wide.begin(), wide.end()); +} + +bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { + output->assign(src, src + src_len); + return true; +} + +std::wstring UTF16ToWide(StringPiece16 utf16) { + return std::wstring(utf16.begin(), utf16.end()); +} + +#elif defined(WCHAR_T_IS_UTF32) + +bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { + return UTFConversion(base::WStringPiece(src, src_len), output); +} + +string16 WideToUTF16(WStringPiece wide) { + string16 ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + WideToUTF16(wide.data(), wide.length(), &ret); + return ret; +} + +bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { + return UTFConversion(StringPiece16(src, src_len), output); +} + +std::wstring UTF16ToWide(StringPiece16 utf16) { + std::wstring ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + UTF16ToWide(utf16.data(), utf16.length(), &ret); + return ret; +} + +#endif // defined(WCHAR_T_IS_UTF32) + +// UTF-8 <-> Wide -------------------------------------------------------------- + +// UTF8ToWide is the same code, regardless of whether wide is 16 or 32 bits + +bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { + return UTFConversion(StringPiece(src, src_len), output); +} + +std::wstring UTF8ToWide(StringPiece utf8) { + std::wstring ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + UTF8ToWide(utf8.data(), utf8.length(), &ret); + return ret; +} + +#if defined(WCHAR_T_IS_UTF16) +// Easy case since we can use the "utf" versions we already wrote above. + +bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { + return UTF16ToUTF8(as_u16cstr(src), src_len, output); +} + +std::string WideToUTF8(WStringPiece wide) { + return UTF16ToUTF8(StringPiece16(as_u16cstr(wide), wide.size())); +} + +#elif defined(WCHAR_T_IS_UTF32) + +bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { + return UTFConversion(WStringPiece(src, src_len), output); +} + +std::string WideToUTF8(WStringPiece wide) { + std::string ret; + // Ignore the success flag of this call, it will do the best it can for + // invalid input, which is what we want here. + WideToUTF8(wide.data(), wide.length(), &ret); + return ret; +} + +#endif // defined(WCHAR_T_IS_UTF32) + +string16 ASCIIToUTF16(StringPiece ascii) { + DCHECK(IsStringASCII(ascii)) << ascii; + return string16(ascii.begin(), ascii.end()); +} + +std::string UTF16ToASCII(StringPiece16 utf16) { + DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); + return std::string(utf16.begin(), utf16.end()); +} + +} // namespace base diff --git a/security/sandbox/chromium/base/strings/utf_string_conversions.h b/security/sandbox/chromium/base/strings/utf_string_conversions.h new file mode 100644 index 0000000000..f780fb4f4f --- /dev/null +++ b/security/sandbox/chromium/base/strings/utf_string_conversions.h @@ -0,0 +1,54 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ +#define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ + +#include <stddef.h> + +#include <string> + +#include "base/base_export.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" + +namespace base { + +// These convert between UTF-8, -16, and -32 strings. They are potentially slow, +// so avoid unnecessary conversions. The low-level versions return a boolean +// indicating whether the conversion was 100% valid. In this case, it will still +// do the best it can and put the result in the output buffer. The versions that +// return strings ignore this error and just return the best conversion +// possible. +BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len, + std::string* output); +BASE_EXPORT std::string WideToUTF8(WStringPiece wide) WARN_UNUSED_RESULT; +BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len, + std::wstring* output); +BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8) WARN_UNUSED_RESULT; + +BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len, + string16* output); +BASE_EXPORT string16 WideToUTF16(WStringPiece wide) WARN_UNUSED_RESULT; +BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len, + std::wstring* output); +BASE_EXPORT std::wstring UTF16ToWide(StringPiece16 utf16) WARN_UNUSED_RESULT; + +BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); +BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8) WARN_UNUSED_RESULT; +BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len, + std::string* output); +BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16) WARN_UNUSED_RESULT; + +// This converts an ASCII string, typically a hardcoded constant, to a UTF16 +// string. +BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii) WARN_UNUSED_RESULT; + +// Converts to 7-bit ASCII by truncating. The result must be known to be ASCII +// beforehand. +BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16) WARN_UNUSED_RESULT; + +} // namespace base + +#endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ |