1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* JavaScript string operations. */
#ifndef js_String_h
#define js_String_h
#include "js/shadow/String.h" // JS::shadow::String
#include "mozilla/Assertions.h" // MOZ_ASSERT
#include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE
#include "mozilla/Likely.h" // MOZ_LIKELY
#include "mozilla/Maybe.h" // mozilla::Maybe
#include "mozilla/Range.h" // mozilla::Range
#include "mozilla/Span.h" // mozilla::Span
#include "mozilla/Tuple.h" // mozilla::Tuple
#include <algorithm> // std::copy_n
#include <stddef.h> // size_t
#include <stdint.h> // uint32_t, uint64_t, INT32_MAX
#include "jstypes.h" // JS_PUBLIC_API
#include "js/CharacterEncoding.h" // JS::UTF8Chars, JS::ConstUTF8CharsZ
#include "js/Id.h" // jsid, JSID_IS_STRING, JSID_TO_STRING
#include "js/RootingAPI.h" // JS::Handle
#include "js/TypeDecls.h" // JS::Latin1Char
#include "js/UniquePtr.h" // JS::UniquePtr
#include "js/Utility.h" // JS::FreePolicy, JS::UniqueTwoByteChars
#include "js/Value.h" // JS::Value
struct JS_PUBLIC_API JSContext;
class JS_PUBLIC_API JSAtom;
class JSLinearString;
class JS_PUBLIC_API JSString;
namespace JS {
class JS_PUBLIC_API AutoRequireNoGC;
} // namespace JS
extern JS_PUBLIC_API JSString* JS_GetEmptyString(JSContext* cx);
// Don't want to export data, so provide accessors for non-inline Values.
extern JS_PUBLIC_API JS::Value JS_GetEmptyStringValue(JSContext* cx);
/*
* String creation.
*
* NB: JS_NewUCString takes ownership of bytes on success, avoiding a copy;
* but on error (signified by null return), it leaves chars owned by the
* caller. So the caller must free bytes in the error case, if it has no use
* for them. In contrast, all the JS_New*StringCopy* functions do not take
* ownership of the character memory passed to them -- they copy it.
*/
extern JS_PUBLIC_API JSString* JS_NewStringCopyN(JSContext* cx, const char* s,
size_t n);
extern JS_PUBLIC_API JSString* JS_NewStringCopyZ(JSContext* cx, const char* s);
extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8Z(
JSContext* cx, const JS::ConstUTF8CharsZ s);
extern JS_PUBLIC_API JSString* JS_NewStringCopyUTF8N(JSContext* cx,
const JS::UTF8Chars s);
extern JS_PUBLIC_API JSString* JS_AtomizeStringN(JSContext* cx, const char* s,
size_t length);
extern JS_PUBLIC_API JSString* JS_AtomizeString(JSContext* cx, const char* s);
// Note: unlike the non-pinning JS_Atomize* functions, this can be called
// without entering a realm/zone.
extern JS_PUBLIC_API JSString* JS_AtomizeAndPinStringN(JSContext* cx,
const char* s,
size_t length);
// Note: unlike the non-pinning JS_Atomize* functions, this can be called
// without entering a realm/zone.
extern JS_PUBLIC_API JSString* JS_AtomizeAndPinString(JSContext* cx,
const char* s);
extern JS_PUBLIC_API JSString* JS_NewLatin1String(
JSContext* cx, js::UniquePtr<JS::Latin1Char[], JS::FreePolicy> chars,
size_t length);
extern JS_PUBLIC_API JSString* JS_NewUCString(JSContext* cx,
JS::UniqueTwoByteChars chars,
size_t length);
extern JS_PUBLIC_API JSString* JS_NewUCStringDontDeflate(
JSContext* cx, JS::UniqueTwoByteChars chars, size_t length);
extern JS_PUBLIC_API JSString* JS_NewUCStringCopyN(JSContext* cx,
const char16_t* s, size_t n);
extern JS_PUBLIC_API JSString* JS_NewUCStringCopyZ(JSContext* cx,
const char16_t* s);
extern JS_PUBLIC_API JSString* JS_AtomizeUCStringN(JSContext* cx,
const char16_t* s,
size_t length);
extern JS_PUBLIC_API JSString* JS_AtomizeUCString(JSContext* cx,
const char16_t* s);
extern JS_PUBLIC_API bool JS_CompareStrings(JSContext* cx, JSString* str1,
JSString* str2, int32_t* result);
[[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii(
JSContext* cx, JSString* str, const char* asciiBytes, bool* match);
// Same as above, but when the length of asciiBytes (excluding the
// trailing null, if any) is known.
[[nodiscard]] extern JS_PUBLIC_API bool JS_StringEqualsAscii(
JSContext* cx, JSString* str, const char* asciiBytes, size_t length,
bool* match);
template <size_t N>
[[nodiscard]] bool JS_StringEqualsLiteral(JSContext* cx, JSString* str,
const char (&asciiBytes)[N],
bool* match) {
MOZ_ASSERT(asciiBytes[N - 1] == '\0');
return JS_StringEqualsAscii(cx, str, asciiBytes, N - 1, match);
}
extern JS_PUBLIC_API size_t JS_PutEscapedString(JSContext* cx, char* buffer,
size_t size, JSString* str,
char quote);
/*
* Extracting string characters and length.
*
* While getting the length of a string is infallible, getting the chars can
* fail. As indicated by the lack of a JSContext parameter, there are two
* special cases where getting the chars is infallible:
*
* The first case is for strings that have been atomized, e.g. directly by
* JS_AtomizeAndPinString or implicitly because it is stored in a jsid.
*
* The second case is "linear" strings that have been explicitly prepared in a
* fallible context by JS_EnsureLinearString. To catch errors, a separate opaque
* JSLinearString type is returned by JS_EnsureLinearString and expected by
* JS_Get{Latin1,TwoByte}StringCharsAndLength. Note, though, that this is purely
* a syntactic distinction: the input and output of JS_EnsureLinearString are
* the same actual GC-thing. If a JSString is known to be linear,
* JS_ASSERT_STRING_IS_LINEAR can be used to make a debug-checked cast. Example:
*
* // In a fallible context.
* JSLinearString* lstr = JS_EnsureLinearString(cx, str);
* if (!lstr) {
* return false;
* }
* MOZ_ASSERT(lstr == JS_ASSERT_STRING_IS_LINEAR(str));
*
* // In an infallible context, for the same 'str'.
* AutoCheckCannotGC nogc;
* const char16_t* chars = JS::GetTwoByteLinearStringChars(nogc, lstr)
* MOZ_ASSERT(chars);
*
* Note: JS strings (including linear strings and atoms) are not
* null-terminated!
*
* Additionally, string characters are stored as either Latin1Char (8-bit)
* or char16_t (16-bit). Clients can use JS::StringHasLatin1Chars and can then
* call either the Latin1* or TwoByte* functions. Some functions like
* JS_CopyStringChars and JS_GetStringCharAt accept both Latin1 and TwoByte
* strings.
*/
extern JS_PUBLIC_API size_t JS_GetStringLength(JSString* str);
extern JS_PUBLIC_API bool JS_StringIsLinear(JSString* str);
extern JS_PUBLIC_API const JS::Latin1Char* JS_GetLatin1StringCharsAndLength(
JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str,
size_t* length);
extern JS_PUBLIC_API const char16_t* JS_GetTwoByteStringCharsAndLength(
JSContext* cx, const JS::AutoRequireNoGC& nogc, JSString* str,
size_t* length);
extern JS_PUBLIC_API bool JS_GetStringCharAt(JSContext* cx, JSString* str,
size_t index, char16_t* res);
extern JS_PUBLIC_API const char16_t* JS_GetTwoByteExternalStringChars(
JSString* str);
extern JS_PUBLIC_API bool JS_CopyStringChars(JSContext* cx,
mozilla::Range<char16_t> dest,
JSString* str);
/**
* Copies the string's characters to a null-terminated char16_t buffer.
*
* Returns nullptr on OOM.
*/
extern JS_PUBLIC_API JS::UniqueTwoByteChars JS_CopyStringCharsZ(JSContext* cx,
JSString* str);
extern JS_PUBLIC_API JSLinearString* JS_EnsureLinearString(JSContext* cx,
JSString* str);
static MOZ_ALWAYS_INLINE JSLinearString* JS_ASSERT_STRING_IS_LINEAR(
JSString* str) {
MOZ_ASSERT(JS_StringIsLinear(str));
return reinterpret_cast<JSLinearString*>(str);
}
static MOZ_ALWAYS_INLINE JSString* JS_FORGET_STRING_LINEARNESS(
JSLinearString* str) {
return reinterpret_cast<JSString*>(str);
}
/*
* Additional APIs that avoid fallibility when given a linear string.
*/
extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str,
const char* asciiBytes);
extern JS_PUBLIC_API bool JS_LinearStringEqualsAscii(JSLinearString* str,
const char* asciiBytes,
size_t length);
template <size_t N>
bool JS_LinearStringEqualsLiteral(JSLinearString* str,
const char (&asciiBytes)[N]) {
MOZ_ASSERT(asciiBytes[N - 1] == '\0');
return JS_LinearStringEqualsAscii(str, asciiBytes, N - 1);
}
extern JS_PUBLIC_API size_t JS_PutEscapedLinearString(char* buffer, size_t size,
JSLinearString* str,
char quote);
/**
* Create a dependent string, i.e., a string that owns no character storage,
* but that refers to a slice of another string's chars. Dependent strings
* are mutable by definition, so the thread safety comments above apply.
*/
extern JS_PUBLIC_API JSString* JS_NewDependentString(JSContext* cx,
JS::Handle<JSString*> str,
size_t start,
size_t length);
/**
* Concatenate two strings, possibly resulting in a rope.
* See above for thread safety comments.
*/
extern JS_PUBLIC_API JSString* JS_ConcatStrings(JSContext* cx,
JS::Handle<JSString*> left,
JS::Handle<JSString*> right);
/**
* For JS_DecodeBytes, set *dstlenp to the size of the destination buffer before
* the call; on return, *dstlenp contains the number of characters actually
* stored. To determine the necessary destination buffer size, make a sizing
* call that passes nullptr for dst.
*
* On errors, the functions report the error. In that case, *dstlenp contains
* the number of characters or bytes transferred so far. If cx is nullptr, no
* error is reported on failure, and the functions simply return false.
*
* NB: This function does not store an additional zero byte or char16_t after
* the transcoded string.
*/
JS_PUBLIC_API bool JS_DecodeBytes(JSContext* cx, const char* src, size_t srclen,
char16_t* dst, size_t* dstlenp);
/**
* Get number of bytes in the string encoding (without accounting for a
* terminating zero bytes. The function returns (size_t) -1 if the string
* can not be encoded into bytes and reports an error using cx accordingly.
*/
JS_PUBLIC_API size_t JS_GetStringEncodingLength(JSContext* cx, JSString* str);
/**
* Encode string into a buffer. The function does not stores an additional
* zero byte. The function returns (size_t) -1 if the string can not be
* encoded into bytes with no error reported. Otherwise it returns the number
* of bytes that are necessary to encode the string. If that exceeds the
* length parameter, the string will be cut and only length bytes will be
* written into the buffer.
*/
[[nodiscard]] JS_PUBLIC_API bool JS_EncodeStringToBuffer(JSContext* cx,
JSString* str,
char* buffer,
size_t length);
/**
* Encode as many scalar values of the string as UTF-8 as can fit
* into the caller-provided buffer replacing unpaired surrogates
* with the REPLACEMENT CHARACTER.
*
* If JS::StringHasLatin1Chars(str) returns true, the function
* is guaranteed to convert the entire string if
* buffer.Length() >= 2 * JS_GetStringLength(str). Otherwise,
* the function is guaranteed to convert the entire string if
* buffer.Length() >= 3 * JS_GetStringLength(str).
*
* This function does not alter the representation of |str| or
* any |JSString*| substring that is a constituent part of it.
* Returns mozilla::Nothing() on OOM, without reporting an error;
* some data may have been written to |buffer| when this happens.
*
* If there's no OOM, returns the number of code units read and
* the number of code units written.
*
* The semantics of this method match the semantics of
* TextEncoder.encodeInto().
*
* The function does not store an additional zero byte.
*/
JS_PUBLIC_API mozilla::Maybe<mozilla::Tuple<size_t, size_t>>
JS_EncodeStringToUTF8BufferPartial(JSContext* cx, JSString* str,
mozilla::Span<char> buffer);
namespace JS {
/**
* Maximum length of a JS string. This is chosen so that the number of bytes
* allocated for a null-terminated TwoByte string still fits in int32_t.
*/
static constexpr uint32_t MaxStringLength = (1 << 30) - 2;
static_assert((uint64_t(MaxStringLength) + 1) * sizeof(char16_t) <= INT32_MAX,
"size of null-terminated JSString char buffer must fit in "
"INT32_MAX");
/** Compute the length of a string. */
MOZ_ALWAYS_INLINE size_t GetStringLength(JSString* s) {
return shadow::AsShadowString(s)->length();
}
/** Compute the length of a linear string. */
MOZ_ALWAYS_INLINE size_t GetLinearStringLength(JSLinearString* s) {
return shadow::AsShadowString(s)->length();
}
/** Return true iff the given linear string uses Latin-1 storage. */
MOZ_ALWAYS_INLINE bool LinearStringHasLatin1Chars(JSLinearString* s) {
return shadow::AsShadowString(s)->hasLatin1Chars();
}
/** Return true iff the given string uses Latin-1 storage. */
MOZ_ALWAYS_INLINE bool StringHasLatin1Chars(JSString* s) {
return shadow::AsShadowString(s)->hasLatin1Chars();
}
/**
* Given a linear string known to use Latin-1 storage, return a pointer to that
* storage. This pointer remains valid only as long as no GC occurs.
*/
MOZ_ALWAYS_INLINE const Latin1Char* GetLatin1LinearStringChars(
const AutoRequireNoGC& nogc, JSLinearString* linear) {
return shadow::AsShadowString(linear)->latin1LinearChars();
}
/**
* Given a linear string known to use two-byte storage, return a pointer to that
* storage. This pointer remains valid only as long as no GC occurs.
*/
MOZ_ALWAYS_INLINE const char16_t* GetTwoByteLinearStringChars(
const AutoRequireNoGC& nogc, JSLinearString* linear) {
return shadow::AsShadowString(linear)->twoByteLinearChars();
}
/**
* Given an in-range index into the provided string, return the character at
* that index.
*/
MOZ_ALWAYS_INLINE char16_t GetLinearStringCharAt(JSLinearString* linear,
size_t index) {
shadow::String* s = shadow::AsShadowString(linear);
MOZ_ASSERT(index < s->length());
return s->hasLatin1Chars() ? s->latin1LinearChars()[index]
: s->twoByteLinearChars()[index];
}
/**
* Convert an atom to a linear string. All atoms are linear, so this
* operation is infallible.
*/
MOZ_ALWAYS_INLINE JSLinearString* AtomToLinearString(JSAtom* atom) {
return reinterpret_cast<JSLinearString*>(atom);
}
/**
* If the provided string uses externally-managed storage, return true and set
* |*callbacks| to the external-string callbacks used to create it and |*chars|
* to a pointer to its two-byte storage. (These pointers remain valid as long
* as the provided string is kept alive.)
*/
MOZ_ALWAYS_INLINE bool IsExternalString(
JSString* str, const JSExternalStringCallbacks** callbacks,
const char16_t** chars) {
shadow::String* s = shadow::AsShadowString(str);
if (!s->isExternal()) {
return false;
}
*callbacks = s->externalCallbacks;
*chars = s->nonInlineCharsTwoByte;
return true;
}
namespace detail {
extern JS_PUBLIC_API JSLinearString* StringToLinearStringSlow(JSContext* cx,
JSString* str);
} // namespace detail
/** Convert a string to a linear string. */
MOZ_ALWAYS_INLINE JSLinearString* StringToLinearString(JSContext* cx,
JSString* str) {
if (MOZ_LIKELY(shadow::AsShadowString(str)->isLinear())) {
return reinterpret_cast<JSLinearString*>(str);
}
return detail::StringToLinearStringSlow(cx, str);
}
/** Copy characters in |s[start..start + len]| to |dest[0..len]|. */
MOZ_ALWAYS_INLINE void CopyLinearStringChars(char16_t* dest, JSLinearString* s,
size_t len, size_t start = 0) {
#ifdef DEBUG
size_t stringLen = GetLinearStringLength(s);
MOZ_ASSERT(start <= stringLen);
MOZ_ASSERT(len <= stringLen - start);
#endif
shadow::String* str = shadow::AsShadowString(s);
if (str->hasLatin1Chars()) {
const Latin1Char* src = str->latin1LinearChars();
for (size_t i = 0; i < len; i++) {
dest[i] = src[start + i];
}
} else {
const char16_t* src = str->twoByteLinearChars();
std::copy_n(src + start, len, dest);
}
}
/**
* Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
* truncating 16-bit values to |char| if necessary.
*/
MOZ_ALWAYS_INLINE void LossyCopyLinearStringChars(char* dest, JSLinearString* s,
size_t len,
size_t start = 0) {
#ifdef DEBUG
size_t stringLen = GetLinearStringLength(s);
MOZ_ASSERT(start <= stringLen);
MOZ_ASSERT(len <= stringLen - start);
#endif
shadow::String* str = shadow::AsShadowString(s);
if (LinearStringHasLatin1Chars(s)) {
const Latin1Char* src = str->latin1LinearChars();
for (size_t i = 0; i < len; i++) {
dest[i] = char(src[start + i]);
}
} else {
const char16_t* src = str->twoByteLinearChars();
for (size_t i = 0; i < len; i++) {
dest[i] = char(src[start + i]);
}
}
}
/**
* Copy characters in |s[start..start + len]| to |dest[0..len]|.
*
* This function is fallible. If you already have a linear string, use the
* infallible |JS::CopyLinearStringChars| above instead.
*/
[[nodiscard]] inline bool CopyStringChars(JSContext* cx, char16_t* dest,
JSString* s, size_t len,
size_t start = 0) {
JSLinearString* linear = StringToLinearString(cx, s);
if (!linear) {
return false;
}
CopyLinearStringChars(dest, linear, len, start);
return true;
}
/**
* Copy characters in |s[start..start + len]| to |dest[0..len]|, lossily
* truncating 16-bit values to |char| if necessary.
*
* This function is fallible. If you already have a linear string, use the
* infallible |JS::LossyCopyLinearStringChars| above instead.
*/
[[nodiscard]] inline bool LossyCopyStringChars(JSContext* cx, char* dest,
JSString* s, size_t len,
size_t start = 0) {
JSLinearString* linear = StringToLinearString(cx, s);
if (!linear) {
return false;
}
LossyCopyLinearStringChars(dest, linear, len, start);
return true;
}
} // namespace JS
/** DO NOT USE, only present for Rust bindings as a temporary hack */
[[deprecated]] extern JS_PUBLIC_API bool JS_DeprecatedStringHasLatin1Chars(
JSString* str);
#endif // js_String_h
|