diff options
Diffstat (limited to 'js/public/String.h')
-rw-r--r-- | js/public/String.h | 556 |
1 files changed, 556 insertions, 0 deletions
diff --git a/js/public/String.h b/js/public/String.h new file mode 100644 index 0000000000..3e003f291c --- /dev/null +++ b/js/public/String.h @@ -0,0 +1,556 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* JavaScript string operations. */ + +#ifndef js_String_h +#define js_String_h + +#include "js/shadow/String.h" // JS::shadow::String + +#include "mozilla/Assertions.h" // MOZ_ASSERT +#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE +#include "mozilla/Likely.h" // MOZ_LIKELY +#include "mozilla/Maybe.h" // mozilla::Maybe +#include "mozilla/Range.h" // mozilla::Range +#include "mozilla/Span.h" // mozilla::Span + // std::tuple + +#include <algorithm> // std::copy_n +#include <stddef.h> // size_t +#include <stdint.h> // uint32_t, uint64_t, INT32_MAX + +#include "jstypes.h" // JS_PUBLIC_API + +#include "js/CharacterEncoding.h" // JS::UTF8Chars, JS::ConstUTF8CharsZ +#include "js/RootingAPI.h" // JS::Handle +#include "js/TypeDecls.h" // JS::Latin1Char +#include "js/UniquePtr.h" // JS::UniquePtr +#include "js/Utility.h" // JS::FreePolicy, JS::UniqueTwoByteChars +#include "js/Value.h" // JS::Value + +struct JS_PUBLIC_API JSContext; +class JS_PUBLIC_API JSAtom; +class JSLinearString; +class JS_PUBLIC_API JSString; + +namespace JS { + +class JS_PUBLIC_API AutoRequireNoGC; + +} // namespace JS + +extern JS_PUBLIC_API JSString* JS_GetEmptyString(JSContext* cx); + +// Don't want to export data, so provide accessors for non-inline Values. +extern JS_PUBLIC_API JS::Value JS_GetEmptyStringValue(JSContext* cx); + +/* + * String creation. + * + * NB: JS_NewUCString takes ownership of bytes on success, avoiding a copy; + * but on error (signified by null return), it leaves chars owned by the + * caller. So the caller must free bytes in the error case, if it has no use + * for them. In contrast, all the JS_New*StringCopy* functions do not take + * ownership of the character memory passed to them -- they copy it. + */ + +extern JS_PUBLIC_API JSString* JS_NewStringCopyN(JSContext* cx, const char* s, + size_t n); + +extern JS_PUBLIC_API JSString* JS_NewStringCopyZ(JSContext* cx, const char* s); + +extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8Z( + JSContext* cx, const JS::ConstUTF8CharsZ s); + +extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8N(JSContext* cx, + const JS::UTF8Chars& s); + +extern JS_PUBLIC_API JSString* JS_AtomizeStringN(JSContext* cx, const char* s, + size_t length); + +extern JS_PUBLIC_API JSString* JS_AtomizeString(JSContext* cx, const char* s); + +// Note: unlike the non-pinning JS_Atomize* functions, this can be called +// without entering a realm/zone. +extern JS_PUBLIC_API JSString* JS_AtomizeAndPinStringN(JSContext* cx, + const char* s, + size_t length); + +// Note: unlike the non-pinning JS_Atomize* functions, this can be called +// without entering a realm/zone. +extern JS_PUBLIC_API JSString* JS_AtomizeAndPinString(JSContext* cx, + const char* s); + +extern JS_PUBLIC_API JSString* JS_NewLatin1String( + JSContext* cx, js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> chars, + size_t length); + +extern JS_PUBLIC_API JSString* JS_NewUCString(JSContext* cx, + JS::UniqueTwoByteChars chars, + size_t length); + +extern JS_PUBLIC_API JSString* JS_NewUCStringDontDeflate( + JSContext* cx, JS::UniqueTwoByteChars chars, size_t length); + +extern JS_PUBLIC_API JSString* JS_NewUCStringCopyN(JSContext* cx, + const char16_t* s, size_t n); + +extern JS_PUBLIC_API JSString* JS_NewUCStringCopyZ(JSContext* cx, + const char16_t* s); + +extern JS_PUBLIC_API JSString* JS_AtomizeUCStringN(JSContext* cx, + const char16_t* s, + size_t length); + +extern JS_PUBLIC_API JSString* JS_AtomizeUCString(JSContext* cx, + const char16_t* s); + +extern JS_PUBLIC_API bool JS_CompareStrings(JSContext* cx, JSString* str1, + JSString* str2, int32_t* result); + +[[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii( + JSContext* cx, JSString* str, const char* asciiBytes, bool* match); + +// Same as above, but when the length of asciiBytes (excluding the +// trailing null, if any) is known. +[[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii( + JSContext* cx, JSString* str, const char* asciiBytes, size_t length, + bool* match); + +template <size_t N> +[[nodiscard]] bool JS_StringEqualsLiteral(JSContext* cx, JSString* str, + const char (&asciiBytes)[N], + bool* match) { + MOZ_ASSERT(asciiBytes[N - 1] == '\0'); + return JS_StringEqualsAscii(cx, str, asciiBytes, N - 1, match); +} + +extern JS_PUBLIC_API size_t JS_PutEscapedString(JSContext* cx, char* buffer, + size_t size, JSString* str, + char quote); + +/* + * Extracting string characters and length. + * + * While getting the length of a string is infallible, getting the chars can + * fail. As indicated by the lack of a JSContext parameter, there are two + * special cases where getting the chars is infallible: + * + * The first case is for strings that have been atomized, e.g. directly by + * JS_AtomizeAndPinString or implicitly because it is stored in a jsid. + * + * The second case is "linear" strings that have been explicitly prepared in a + * fallible context by JS_EnsureLinearString. To catch errors, a separate opaque + * JSLinearString type is returned by JS_EnsureLinearString and expected by + * JS_Get{Latin1,TwoByte}StringCharsAndLength. Note, though, that this is purely + * a syntactic distinction: the input and output of JS_EnsureLinearString are + * the same actual GC-thing. If a JSString is known to be linear, + * JS_ASSERT_STRING_IS_LINEAR can be used to make a debug-checked cast. Example: + * + * // In a fallible context. + * JSLinearString* lstr = JS_EnsureLinearString(cx, str); + * if (!lstr) { + * return false; + * } + * MOZ_ASSERT(lstr == JS_ASSERT_STRING_IS_LINEAR(str)); + * + * // In an infallible context, for the same 'str'. + * AutoCheckCannotGC nogc; + * const char16_t* chars = JS::GetTwoByteLinearStringChars(nogc, lstr) + * MOZ_ASSERT(chars); + * + * Note: JS strings (including linear strings and atoms) are not + * null-terminated! + * + * Additionally, string characters are stored as either Latin1Char (8-bit) + * or char16_t (16-bit). Clients can use JS::StringHasLatin1Chars and can then + * call either the Latin1* or TwoByte* functions. Some functions like + * JS_CopyStringChars and JS_GetStringCharAt accept both Latin1 and TwoByte + * strings. + */ + +extern JS_PUBLIC_API size_t JS_GetStringLength(JSString* str); + +extern JS_PUBLIC_API bool JS_StringIsLinear(JSString* str); + +extern JS_PUBLIC_API const JS::Latin1Char* JS_GetLatin1StringCharsAndLength( + JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str, + size_t* length); + +extern JS_PUBLIC_API const char16_t* JS_GetTwoByteStringCharsAndLength( + JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str, + size_t* length); + +extern JS_PUBLIC_API bool JS_GetStringCharAt(JSContext* cx, JSString* str, + size_t index, char16_t* res); + +extern JS_PUBLIC_API const char16_t* JS_GetTwoByteExternalStringChars( + JSString* str); + +extern JS_PUBLIC_API bool JS_CopyStringChars( + JSContext* cx, const mozilla::Range<char16_t>& dest, JSString* str); + +/** + * Copies the string's characters to a null-terminated char16_t buffer. + * + * Returns nullptr on OOM. + */ +extern JS_PUBLIC_API JS::UniqueTwoByteChars JS_CopyStringCharsZ(JSContext* cx, + JSString* str); + +extern JS_PUBLIC_API JSLinearString* JS_EnsureLinearString(JSContext* cx, + JSString* str); + +static MOZ_ALWAYS_INLINE JSLinearString* JS_ASSERT_STRING_IS_LINEAR( + JSString* str) { + MOZ_ASSERT(JS_StringIsLinear(str)); + return reinterpret_cast<JSLinearString*>(str); +} + +static MOZ_ALWAYS_INLINE JSString* JS_FORGET_STRING_LINEARNESS( + JSLinearString* str) { + return reinterpret_cast<JSString*>(str); +} + +/* + * Additional APIs that avoid fallibility when given a linear string. + */ + +extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str, + const char* asciiBytes); +extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str, + const char* asciiBytes, + size_t length); + +template <size_t N> +bool JS_LinearStringEqualsLiteral(JSLinearString* str, + const char (&asciiBytes)[N]) { + MOZ_ASSERT(asciiBytes[N - 1] == '\0'); + return JS_LinearStringEqualsAscii(str, asciiBytes, N - 1); +} + +extern JS_PUBLIC_API size_t JS_PutEscapedLinearString(char* buffer, size_t size, + JSLinearString* str, + char quote); + +/** + * Create a dependent string, i.e., a string that owns no character storage, + * but that refers to a slice of another string's chars. Dependent strings + * are mutable by definition, so the thread safety comments above apply. + */ +extern JS_PUBLIC_API JSString* JS_NewDependentString(JSContext* cx, + JS::Handle<JSString*> str, + size_t start, + size_t length); + +/** + * Concatenate two strings, possibly resulting in a rope. + * See above for thread safety comments. + */ +extern JS_PUBLIC_API JSString* JS_ConcatStrings(JSContext* cx, + JS::Handle<JSString*> left, + JS::Handle<JSString*> right); + +/** + * For JS_DecodeBytes, set *dstlenp to the size of the destination buffer before + * the call; on return, *dstlenp contains the number of characters actually + * stored. To determine the necessary destination buffer size, make a sizing + * call that passes nullptr for dst. + * + * On errors, the functions report the error. In that case, *dstlenp contains + * the number of characters or bytes transferred so far. If cx is nullptr, no + * error is reported on failure, and the functions simply return false. + * + * NB: This function does not store an additional zero byte or char16_t after + * the transcoded string. + */ +JS_PUBLIC_API bool JS_DecodeBytes(JSContext* cx, const char* src, size_t srclen, + char16_t* dst, size_t* dstlenp); + +/** + * Get number of bytes in the string encoding (without accounting for a + * terminating zero bytes. The function returns (size_t) -1 if the string + * can not be encoded into bytes and reports an error using cx accordingly. + */ +JS_PUBLIC_API size_t JS_GetStringEncodingLength(JSContext* cx, JSString* str); + +/** + * Encode string into a buffer. The function does not stores an additional + * zero byte. The function returns (size_t) -1 if the string can not be + * encoded into bytes with no error reported. Otherwise it returns the number + * of bytes that are necessary to encode the string. If that exceeds the + * length parameter, the string will be cut and only length bytes will be + * written into the buffer. + */ +[[nodiscard]] JS_PUBLIC_API bool JS_EncodeStringToBuffer(JSContext* cx, + JSString* str, + char* buffer, + size_t length); + +/** + * Encode as many scalar values of the string as UTF-8 as can fit + * into the caller-provided buffer replacing unpaired surrogates + * with the REPLACEMENT CHARACTER. + * + * If JS::StringHasLatin1Chars(str) returns true, the function + * is guaranteed to convert the entire string if + * buffer.Length() >= 2 * JS_GetStringLength(str). Otherwise, + * the function is guaranteed to convert the entire string if + * buffer.Length() >= 3 * JS_GetStringLength(str). + * + * This function does not alter the representation of |str| or + * any |JSString*| substring that is a constituent part of it. + * Returns mozilla::Nothing() on OOM, without reporting an error; + * some data may have been written to |buffer| when this happens. + * + * If there's no OOM, returns the number of code units read and + * the number of code units written. + * + * The semantics of this method match the semantics of + * TextEncoder.encodeInto(). + * + * The function does not store an additional zero byte. + */ +JS_PUBLIC_API mozilla::Maybe<std::tuple<size_t, size_t>> +JS_EncodeStringToUTF8BufferPartial(JSContext* cx, JSString* str, + mozilla::Span<char> buffer); + +namespace JS { + +/** + * Maximum length of a JS string. This is chosen so that the number of bytes + * allocated for a null-terminated TwoByte string still fits in int32_t. + */ +static constexpr uint32_t MaxStringLength = (1 << 30) - 2; + +static_assert((uint64_t(MaxStringLength) + 1) * sizeof(char16_t) <= INT32_MAX, + "size of null-terminated JSString char buffer must fit in " + "INT32_MAX"); + +/** Compute the length of a string. */ +MOZ_ALWAYS_INLINE size_t GetStringLength(JSString* s) { + return shadow::AsShadowString(s)->length(); +} + +/** Compute the length of a linear string. */ +MOZ_ALWAYS_INLINE size_t GetLinearStringLength(JSLinearString* s) { + return shadow::AsShadowString(s)->length(); +} + +/** Return true iff the given linear string uses Latin-1 storage. */ +MOZ_ALWAYS_INLINE bool LinearStringHasLatin1Chars(JSLinearString* s) { + return shadow::AsShadowString(s)->hasLatin1Chars(); +} + +/** Return true iff the given string uses Latin-1 storage. */ +MOZ_ALWAYS_INLINE bool StringHasLatin1Chars(JSString* s) { + return shadow::AsShadowString(s)->hasLatin1Chars(); +} + +/** + * Given a linear string known to use Latin-1 storage, return a pointer to that + * storage. This pointer remains valid only as long as no GC occurs. + */ +MOZ_ALWAYS_INLINE const Latin1Char* GetLatin1LinearStringChars( + const AutoRequireNoGC& nogc, JSLinearString* linear) { + return shadow::AsShadowString(linear)->latin1LinearChars(); +} + +/** + * Given a linear string known to use two-byte storage, return a pointer to that + * storage. This pointer remains valid only as long as no GC occurs. + */ +MOZ_ALWAYS_INLINE const char16_t* GetTwoByteLinearStringChars( + const AutoRequireNoGC& nogc, JSLinearString* linear) { + return shadow::AsShadowString(linear)->twoByteLinearChars(); +} + +/** + * Given an in-range index into the provided string, return the character at + * that index. + */ +MOZ_ALWAYS_INLINE char16_t GetLinearStringCharAt(JSLinearString* linear, + size_t index) { + shadow::String* s = shadow::AsShadowString(linear); + MOZ_ASSERT(index < s->length()); + + return s->hasLatin1Chars() ? s->latin1LinearChars()[index] + : s->twoByteLinearChars()[index]; +} + +/** + * Convert an atom to a linear string. All atoms are linear, so this + * operation is infallible. + */ +MOZ_ALWAYS_INLINE JSLinearString* AtomToLinearString(JSAtom* atom) { + return reinterpret_cast<JSLinearString*>(atom); +} + +/** + * If the provided string uses externally-managed latin-1 storage, return true + * and set |*callbacks| to the external-string callbacks used to create it and + * |*chars| to a pointer to its latin1 storage. (These pointers remain valid + * as long as the provided string is kept alive.) + */ +MOZ_ALWAYS_INLINE bool IsExternalStringLatin1( + JSString* str, const JSExternalStringCallbacks** callbacks, + const JS::Latin1Char** chars) { + shadow::String* s = shadow::AsShadowString(str); + + if (!s->isExternal() || !s->hasLatin1Chars()) { + return false; + } + + *callbacks = s->externalCallbacks; + *chars = s->nonInlineCharsLatin1; + return true; +} + +/** + * If the provided string uses externally-managed two-byte storage, return true + * and set |*callbacks| to the external-string callbacks used to create it and + * |*chars| to a pointer to its two-byte storage. (These pointers remain valid + * as long as the provided string is kept alive.) + */ +MOZ_ALWAYS_INLINE bool IsExternalUCString( + JSString* str, const JSExternalStringCallbacks** callbacks, + const char16_t** chars) { + shadow::String* s = shadow::AsShadowString(str); + + if (!s->isExternal() || s->hasLatin1Chars()) { + return false; + } + + *callbacks = s->externalCallbacks; + *chars = s->nonInlineCharsTwoByte; + return true; +} + +namespace detail { + +extern JS_PUBLIC_API JSLinearString* StringToLinearStringSlow(JSContext* cx, + JSString* str); + +} // namespace detail + +/** Convert a string to a linear string. */ +MOZ_ALWAYS_INLINE JSLinearString* StringToLinearString(JSContext* cx, + JSString* str) { + if (MOZ_LIKELY(shadow::AsShadowString(str)->isLinear())) { + return reinterpret_cast<JSLinearString*>(str); + } + + return detail::StringToLinearStringSlow(cx, str); +} + +/** Copy characters in |s[start..start + len]| to |dest[0..len]|. */ +MOZ_ALWAYS_INLINE void CopyLinearStringChars(char16_t* dest, JSLinearString* s, + size_t len, size_t start = 0) { +#ifdef DEBUG + size_t stringLen = GetLinearStringLength(s); + MOZ_ASSERT(start <= stringLen); + MOZ_ASSERT(len <= stringLen - start); +#endif + + shadow::String* str = shadow::AsShadowString(s); + + if (str->hasLatin1Chars()) { + const Latin1Char* src = str->latin1LinearChars(); + for (size_t i = 0; i < len; i++) { + dest[i] = src[start + i]; + } + } else { + const char16_t* src = str->twoByteLinearChars(); + std::copy_n(src + start, len, dest); + } +} + +/** + * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily + * truncating 16-bit values to |char| if necessary. + */ +MOZ_ALWAYS_INLINE void LossyCopyLinearStringChars(char* dest, JSLinearString* s, + size_t len, + size_t start = 0) { +#ifdef DEBUG + size_t stringLen = GetLinearStringLength(s); + MOZ_ASSERT(start <= stringLen); + MOZ_ASSERT(len <= stringLen - start); +#endif + + shadow::String* str = shadow::AsShadowString(s); + + if (LinearStringHasLatin1Chars(s)) { + const Latin1Char* src = str->latin1LinearChars(); + for (size_t i = 0; i < len; i++) { + dest[i] = char(src[start + i]); + } + } else { + const char16_t* src = str->twoByteLinearChars(); + for (size_t i = 0; i < len; i++) { + dest[i] = char(src[start + i]); + } + } +} + +/** + * Copy characters in |s[start..start + len]| to |dest[0..len]|. + * + * This function is fallible. If you already have a linear string, use the + * infallible |JS::CopyLinearStringChars| above instead. + */ +[[nodiscard]] inline bool CopyStringChars(JSContext* cx, char16_t* dest, + JSString* s, size_t len, + size_t start = 0) { + JSLinearString* linear = StringToLinearString(cx, s); + if (!linear) { + return false; + } + + CopyLinearStringChars(dest, linear, len, start); + return true; +} + +/** + * Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily + * truncating 16-bit values to |char| if necessary. + * + * This function is fallible. If you already have a linear string, use the + * infallible |JS::LossyCopyLinearStringChars| above instead. + */ +[[nodiscard]] inline bool LossyCopyStringChars(JSContext* cx, char* dest, + JSString* s, size_t len, + size_t start = 0) { + JSLinearString* linear = StringToLinearString(cx, s); + if (!linear) { + return false; + } + + LossyCopyLinearStringChars(dest, linear, len, start); + return true; +} + +} // namespace JS + +/** DO NOT USE, only present for Rust bindings as a temporary hack */ +[[deprecated]] extern JS_PUBLIC_API bool JS_DeprecatedStringHasLatin1Chars( + JSString* str); + +// JSString* is an aligned pointer, but this information isn't available in the +// public header. We specialize HasFreeLSB here so that JS::Result<JSString*> +// compiles. + +namespace mozilla { +namespace detail { +template <> +struct HasFreeLSB<JSString*> { + static constexpr bool value = true; +}; +} // namespace detail +} // namespace mozilla + +#endif // js_String_h |