// Copyright 2012 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_REGEXP_REGEXP_H_ #define V8_REGEXP_REGEXP_H_ #include "irregexp/imported/regexp-error.h" #include "irregexp/RegExpShim.h" namespace v8 { namespace internal { class JSRegExp; class RegExpCapture; class RegExpMatchInfo; class RegExpNode; class RegExpTree; enum class RegExpCompilationTarget : int { kBytecode, kNative }; // TODO(jgruber): Do not expose in regexp.h. // TODO(jgruber): Consider splitting between ParseData and CompileData. struct RegExpCompileData { // The parsed AST as produced by the RegExpParser. RegExpTree* tree = nullptr; // The compiled Node graph as produced by RegExpTree::ToNode methods. RegExpNode* node = nullptr; // Either the generated code as produced by the compiler or a trampoline // to the interpreter. Handle code; // True, iff the pattern is a 'simple' atom with zero captures. In other // words, the pattern consists of a string with no metacharacters and special // regexp features, and can be implemented as a standard string search. bool simple = true; // True, iff the pattern is anchored at the start of the string with '^'. bool contains_anchor = false; // Only set if the pattern contains named captures. // Note: the lifetime equals that of the parse/compile zone. ZoneVector* named_captures = nullptr; // The error message. Only used if an error occurred during parsing or // compilation. RegExpError error = RegExpError::kNone; // The position at which the error was detected. Only used if an // error occurred. int error_pos = 0; // The number of capture groups, without the global capture \0. int capture_count = 0; // The number of registers used by the generated code. int register_count = 0; // The compilation target (bytecode or native code). RegExpCompilationTarget compilation_target; }; class RegExp final : public AllStatic { public: // Whether the irregexp engine generates interpreter bytecode. static bool CanGenerateBytecode(); // Verify that the given flags combination is valid. V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags); // Verify the given pattern, i.e. check that parsing succeeds. If // verification fails, `regexp_error_out` is set. template static bool VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input, int input_length, RegExpFlags flags, RegExpError* regexp_error_out, const DisallowGarbageCollection& no_gc); // Parses the RegExp pattern and prepares the JSRegExp object with // generic data and choice of implementation - as well as what // the implementation wants to store in the data field. // Returns false if compilation fails. V8_WARN_UNUSED_RESULT static MaybeHandle Compile( Isolate* isolate, Handle re, Handle pattern, RegExpFlags flags, uint32_t backtrack_limit); // Ensures that a regexp is fully compiled and ready to be executed on a // subject string. Returns true on success. Throw and return false on // failure. V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, Handle re, Handle subject); enum CallOrigin : int { kFromRuntime = 0, kFromJs = 1, }; enum class ExecQuirks { kNone, // Used to work around an issue in the RegExpPrototypeSplit fast path, // which diverges from the spec by not creating a sticky copy of the RegExp // instance and calling `exec` in a loop. If called in this context, we // must not update the last_match_info on a successful match at the subject // string end. See crbug.com/1075514 for more information. kTreatMatchAtEndAsFailure, }; // See ECMA-262 section 15.10.6.2. // This function calls the garbage collector if necessary. V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle Exec( Isolate* isolate, Handle regexp, Handle subject, int index, Handle last_match_info, ExecQuirks exec_quirks = ExecQuirks::kNone); V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle ExperimentalOneshotExec(Isolate* isolate, Handle regexp, Handle subject, int index, Handle last_match_info, ExecQuirks exec_quirks = ExecQuirks::kNone); // Integral return values used throughout regexp code layers. static constexpr int kInternalRegExpFailure = 0; static constexpr int kInternalRegExpSuccess = 1; static constexpr int kInternalRegExpException = -1; static constexpr int kInternalRegExpRetry = -2; static constexpr int kInternalRegExpFallbackToExperimental = -3; static constexpr int kInternalRegExpSmallestResult = -3; enum IrregexpResult : int32_t { RE_FAILURE = kInternalRegExpFailure, RE_SUCCESS = kInternalRegExpSuccess, RE_EXCEPTION = kInternalRegExpException, RE_RETRY = kInternalRegExpRetry, RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, }; // Set last match info. If match is nullptr, then setting captures is // omitted. static Handle SetLastMatchInfo( Isolate* isolate, Handle last_match_info, Handle subject, int capture_count, int32_t* match); V8_EXPORT_PRIVATE static bool CompileForTesting( Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags, Handle pattern, Handle sample_subject, bool is_one_byte); V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, RegExpNode* node); static const int kRegExpTooLargeToOptimize = 20 * KB; V8_WARN_UNUSED_RESULT static MaybeHandle ThrowRegExpException(Isolate* isolate, Handle re, RegExpFlags flags, Handle pattern, RegExpError error); static void ThrowRegExpException(Isolate* isolate, Handle re, RegExpError error_text); static bool IsUnmodifiedRegExp(Isolate* isolate, Handle regexp); static Handle CreateCaptureNameMap( Isolate* isolate, ZoneVector* named_captures); }; // Uses a special global mode of irregexp-generated code to perform a global // search and return multiple results at once. As such, this is essentially an // iterator over multiple results (retrieved batch-wise in advance). class RegExpGlobalCache final { public: RegExpGlobalCache(Handle regexp, Handle subject, Isolate* isolate); ~RegExpGlobalCache(); // Fetch the next entry in the cache for global regexp match results. // This does not set the last match info. Upon failure, nullptr is // returned. The cause can be checked with Result(). The previous result is // still in available in memory when a failure happens. int32_t* FetchNext(); int32_t* LastSuccessfulMatch(); bool HasException() { return num_matches_ < 0; } private: int AdvanceZeroLength(int last_index); int num_matches_; int max_matches_; int current_match_index_; int registers_per_match_; // Pointer to the last set of captures. int32_t* register_array_; int register_array_size_; Handle regexp_; Handle subject_; Isolate* isolate_; }; // Caches results for specific regexp queries on the isolate. At the time of // writing, this is used during global calls to RegExp.prototype.exec and // @@split. class RegExpResultsCache final : public AllStatic { public: enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. // On success, the returned result is guaranteed to be a COW-array. static Tagged Lookup(Heap* heap, Tagged key_string, Tagged key_pattern, Tagged* last_match_out, ResultsCacheType type); // Attempt to add value_array to the cache specified by type. On success, // value_array is turned into a COW-array. static void Enter(Isolate* isolate, Handle key_string, Handle key_pattern, Handle value_array, Handle last_match_cache, ResultsCacheType type); static void Clear(Tagged cache); static constexpr int kRegExpResultsCacheSize = 0x100; private: static constexpr int kStringOffset = 0; static constexpr int kPatternOffset = 1; static constexpr int kArrayOffset = 2; static constexpr int kLastMatchOffset = 3; static constexpr int kArrayEntriesPerCacheEntry = 4; }; } // namespace internal } // namespace v8 #endif // V8_REGEXP_REGEXP_H_