diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /js/src/irregexp/imported/regexp.h | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/irregexp/imported/regexp.h')
-rw-r--r-- | js/src/irregexp/imported/regexp.h | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/js/src/irregexp/imported/regexp.h b/js/src/irregexp/imported/regexp.h new file mode 100644 index 0000000000..b6c8e43e47 --- /dev/null +++ b/js/src/irregexp/imported/regexp.h @@ -0,0 +1,235 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_REGEXP_REGEXP_H_ +#define V8_REGEXP_REGEXP_H_ + +#include "irregexp/imported/regexp-error.h" +#include "irregexp/RegExpShim.h" + +namespace v8 { +namespace internal { + +class JSRegExp; +class RegExpCapture; +class RegExpMatchInfo; +class RegExpNode; +class RegExpTree; + +enum class RegExpCompilationTarget : int { kBytecode, kNative }; + +// TODO(jgruber): Do not expose in regexp.h. +// TODO(jgruber): Consider splitting between ParseData and CompileData. +struct RegExpCompileData { + // The parsed AST as produced by the RegExpParser. + RegExpTree* tree = nullptr; + + // The compiled Node graph as produced by RegExpTree::ToNode methods. + RegExpNode* node = nullptr; + + // Either the generated code as produced by the compiler or a trampoline + // to the interpreter. + Handle<Object> code; + + // True, iff the pattern is a 'simple' atom with zero captures. In other + // words, the pattern consists of a string with no metacharacters and special + // regexp features, and can be implemented as a standard string search. + bool simple = true; + + // True, iff the pattern is anchored at the start of the string with '^'. + bool contains_anchor = false; + + // Only set if the pattern contains named captures. + // Note: the lifetime equals that of the parse/compile zone. + ZoneVector<RegExpCapture*>* named_captures = nullptr; + + // The error message. Only used if an error occurred during parsing or + // compilation. + RegExpError error = RegExpError::kNone; + + // The position at which the error was detected. Only used if an + // error occurred. + int error_pos = 0; + + // The number of capture groups, without the global capture \0. + int capture_count = 0; + + // The number of registers used by the generated code. + int register_count = 0; + + // The compilation target (bytecode or native code). + RegExpCompilationTarget compilation_target; +}; + +class RegExp final : public AllStatic { + public: + // Whether the irregexp engine generates interpreter bytecode. + static bool CanGenerateBytecode(); + + // Verify that the given flags combination is valid. + V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags); + + // Verify the given pattern, i.e. check that parsing succeeds. If + // verification fails, `regexp_error_out` is set. + template <class CharT> + static bool VerifySyntax(Zone* zone, uintptr_t stack_limit, + const CharT* input, int input_length, + RegExpFlags flags, RegExpError* regexp_error_out, + const DisallowGarbageCollection& no_gc); + + // Parses the RegExp pattern and prepares the JSRegExp object with + // generic data and choice of implementation - as well as what + // the implementation wants to store in the data field. + // Returns false if compilation fails. + V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile( + Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, + RegExpFlags flags, uint32_t backtrack_limit); + + // Ensures that a regexp is fully compiled and ready to be executed on a + // subject string. Returns true on success. Return false on failure, and + // then an exception will be pending. + V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, + Handle<JSRegExp> re, + Handle<String> subject); + + enum CallOrigin : int { + kFromRuntime = 0, + kFromJs = 1, + }; + + enum class ExecQuirks { + kNone, + // Used to work around an issue in the RegExpPrototypeSplit fast path, + // which diverges from the spec by not creating a sticky copy of the RegExp + // instance and calling `exec` in a loop. If called in this context, we + // must not update the last_match_info on a successful match at the subject + // string end. See crbug.com/1075514 for more information. + kTreatMatchAtEndAsFailure, + }; + + // See ECMA-262 section 15.10.6.2. + // This function calls the garbage collector if necessary. + V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( + Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, + int index, Handle<RegExpMatchInfo> last_match_info, + ExecQuirks exec_quirks = ExecQuirks::kNone); + + V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> + ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, + Handle<String> subject, int index, + Handle<RegExpMatchInfo> last_match_info, + ExecQuirks exec_quirks = ExecQuirks::kNone); + + // Integral return values used throughout regexp code layers. + static constexpr int kInternalRegExpFailure = 0; + static constexpr int kInternalRegExpSuccess = 1; + static constexpr int kInternalRegExpException = -1; + static constexpr int kInternalRegExpRetry = -2; + static constexpr int kInternalRegExpFallbackToExperimental = -3; + static constexpr int kInternalRegExpSmallestResult = -3; + + enum IrregexpResult : int32_t { + RE_FAILURE = kInternalRegExpFailure, + RE_SUCCESS = kInternalRegExpSuccess, + RE_EXCEPTION = kInternalRegExpException, + RE_RETRY = kInternalRegExpRetry, + RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, + }; + + // Set last match info. If match is nullptr, then setting captures is + // omitted. + static Handle<RegExpMatchInfo> SetLastMatchInfo( + Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, + Handle<String> subject, int capture_count, int32_t* match); + + V8_EXPORT_PRIVATE static bool CompileForTesting( + Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags, + Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte); + + V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, + RegExpNode* node); + + static const int kRegExpTooLargeToOptimize = 20 * KB; + + V8_WARN_UNUSED_RESULT + static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate, + Handle<JSRegExp> re, + Handle<String> pattern, + RegExpError error); + static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, + RegExpError error_text); + + static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); + + static Handle<FixedArray> CreateCaptureNameMap( + Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures); +}; + +// Uses a special global mode of irregexp-generated code to perform a global +// search and return multiple results at once. As such, this is essentially an +// iterator over multiple results (retrieved batch-wise in advance). +class RegExpGlobalCache final { + public: + RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject, + Isolate* isolate); + + ~RegExpGlobalCache(); + + // Fetch the next entry in the cache for global regexp match results. + // This does not set the last match info. Upon failure, nullptr is + // returned. The cause can be checked with Result(). The previous result is + // still in available in memory when a failure happens. + int32_t* FetchNext(); + + int32_t* LastSuccessfulMatch(); + + bool HasException() { return num_matches_ < 0; } + + private: + int AdvanceZeroLength(int last_index); + + int num_matches_; + int max_matches_; + int current_match_index_; + int registers_per_match_; + // Pointer to the last set of captures. + int32_t* register_array_; + int register_array_size_; + Handle<JSRegExp> regexp_; + Handle<String> subject_; + Isolate* isolate_; +}; + +// Caches results for specific regexp queries on the isolate. At the time of +// writing, this is used during global calls to RegExp.prototype.exec and +// @@split. +class RegExpResultsCache final : public AllStatic { + public: + enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; + + // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. + // On success, the returned result is guaranteed to be a COW-array. + static Object Lookup(Heap* heap, String key_string, Object key_pattern, + FixedArray* last_match_out, ResultsCacheType type); + // Attempt to add value_array to the cache specified by type. On success, + // value_array is turned into a COW-array. + static void Enter(Isolate* isolate, Handle<String> key_string, + Handle<Object> key_pattern, Handle<FixedArray> value_array, + Handle<FixedArray> last_match_cache, ResultsCacheType type); + static void Clear(FixedArray cache); + + static constexpr int kRegExpResultsCacheSize = 0x100; + + private: + static constexpr int kStringOffset = 0; + static constexpr int kPatternOffset = 1; + static constexpr int kArrayOffset = 2; + static constexpr int kLastMatchOffset = 3; + static constexpr int kArrayEntriesPerCacheEntry = 4; +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_REGEXP_REGEXP_H_ |