summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/imported/regexp.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /js/src/irregexp/imported/regexp.h
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'js/src/irregexp/imported/regexp.h')
-rw-r--r--js/src/irregexp/imported/regexp.h235
1 files changed, 235 insertions, 0 deletions
diff --git a/js/src/irregexp/imported/regexp.h b/js/src/irregexp/imported/regexp.h
new file mode 100644
index 0000000000..b6c8e43e47
--- /dev/null
+++ b/js/src/irregexp/imported/regexp.h
@@ -0,0 +1,235 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_REGEXP_H_
+#define V8_REGEXP_REGEXP_H_
+
+#include "irregexp/imported/regexp-error.h"
+#include "irregexp/RegExpShim.h"
+
+namespace v8 {
+namespace internal {
+
+class JSRegExp;
+class RegExpCapture;
+class RegExpMatchInfo;
+class RegExpNode;
+class RegExpTree;
+
+enum class RegExpCompilationTarget : int { kBytecode, kNative };
+
+// TODO(jgruber): Do not expose in regexp.h.
+// TODO(jgruber): Consider splitting between ParseData and CompileData.
+struct RegExpCompileData {
+ // The parsed AST as produced by the RegExpParser.
+ RegExpTree* tree = nullptr;
+
+ // The compiled Node graph as produced by RegExpTree::ToNode methods.
+ RegExpNode* node = nullptr;
+
+ // Either the generated code as produced by the compiler or a trampoline
+ // to the interpreter.
+ Handle<Object> code;
+
+ // True, iff the pattern is a 'simple' atom with zero captures. In other
+ // words, the pattern consists of a string with no metacharacters and special
+ // regexp features, and can be implemented as a standard string search.
+ bool simple = true;
+
+ // True, iff the pattern is anchored at the start of the string with '^'.
+ bool contains_anchor = false;
+
+ // Only set if the pattern contains named captures.
+ // Note: the lifetime equals that of the parse/compile zone.
+ ZoneVector<RegExpCapture*>* named_captures = nullptr;
+
+ // The error message. Only used if an error occurred during parsing or
+ // compilation.
+ RegExpError error = RegExpError::kNone;
+
+ // The position at which the error was detected. Only used if an
+ // error occurred.
+ int error_pos = 0;
+
+ // The number of capture groups, without the global capture \0.
+ int capture_count = 0;
+
+ // The number of registers used by the generated code.
+ int register_count = 0;
+
+ // The compilation target (bytecode or native code).
+ RegExpCompilationTarget compilation_target;
+};
+
+class RegExp final : public AllStatic {
+ public:
+ // Whether the irregexp engine generates interpreter bytecode.
+ static bool CanGenerateBytecode();
+
+ // Verify that the given flags combination is valid.
+ V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags);
+
+ // Verify the given pattern, i.e. check that parsing succeeds. If
+ // verification fails, `regexp_error_out` is set.
+ template <class CharT>
+ static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
+ const CharT* input, int input_length,
+ RegExpFlags flags, RegExpError* regexp_error_out,
+ const DisallowGarbageCollection& no_gc);
+
+ // Parses the RegExp pattern and prepares the JSRegExp object with
+ // generic data and choice of implementation - as well as what
+ // the implementation wants to store in the data field.
+ // Returns false if compilation fails.
+ V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
+ Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
+ RegExpFlags flags, uint32_t backtrack_limit);
+
+ // Ensures that a regexp is fully compiled and ready to be executed on a
+ // subject string. Returns true on success. Return false on failure, and
+ // then an exception will be pending.
+ V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
+ Handle<JSRegExp> re,
+ Handle<String> subject);
+
+ enum CallOrigin : int {
+ kFromRuntime = 0,
+ kFromJs = 1,
+ };
+
+ enum class ExecQuirks {
+ kNone,
+ // Used to work around an issue in the RegExpPrototypeSplit fast path,
+ // which diverges from the spec by not creating a sticky copy of the RegExp
+ // instance and calling `exec` in a loop. If called in this context, we
+ // must not update the last_match_info on a successful match at the subject
+ // string end. See crbug.com/1075514 for more information.
+ kTreatMatchAtEndAsFailure,
+ };
+
+ // See ECMA-262 section 15.10.6.2.
+ // This function calls the garbage collector if necessary.
+ V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
+ Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
+ int index, Handle<RegExpMatchInfo> last_match_info,
+ ExecQuirks exec_quirks = ExecQuirks::kNone);
+
+ V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
+ ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
+ Handle<String> subject, int index,
+ Handle<RegExpMatchInfo> last_match_info,
+ ExecQuirks exec_quirks = ExecQuirks::kNone);
+
+ // Integral return values used throughout regexp code layers.
+ static constexpr int kInternalRegExpFailure = 0;
+ static constexpr int kInternalRegExpSuccess = 1;
+ static constexpr int kInternalRegExpException = -1;
+ static constexpr int kInternalRegExpRetry = -2;
+ static constexpr int kInternalRegExpFallbackToExperimental = -3;
+ static constexpr int kInternalRegExpSmallestResult = -3;
+
+ enum IrregexpResult : int32_t {
+ RE_FAILURE = kInternalRegExpFailure,
+ RE_SUCCESS = kInternalRegExpSuccess,
+ RE_EXCEPTION = kInternalRegExpException,
+ RE_RETRY = kInternalRegExpRetry,
+ RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
+ };
+
+ // Set last match info. If match is nullptr, then setting captures is
+ // omitted.
+ static Handle<RegExpMatchInfo> SetLastMatchInfo(
+ Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
+ Handle<String> subject, int capture_count, int32_t* match);
+
+ V8_EXPORT_PRIVATE static bool CompileForTesting(
+ Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
+ Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte);
+
+ V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
+ RegExpNode* node);
+
+ static const int kRegExpTooLargeToOptimize = 20 * KB;
+
+ V8_WARN_UNUSED_RESULT
+ static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
+ Handle<JSRegExp> re,
+ Handle<String> pattern,
+ RegExpError error);
+ static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
+ RegExpError error_text);
+
+ static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
+
+ static Handle<FixedArray> CreateCaptureNameMap(
+ Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
+};
+
+// Uses a special global mode of irregexp-generated code to perform a global
+// search and return multiple results at once. As such, this is essentially an
+// iterator over multiple results (retrieved batch-wise in advance).
+class RegExpGlobalCache final {
+ public:
+ RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
+ Isolate* isolate);
+
+ ~RegExpGlobalCache();
+
+ // Fetch the next entry in the cache for global regexp match results.
+ // This does not set the last match info. Upon failure, nullptr is
+ // returned. The cause can be checked with Result(). The previous result is
+ // still in available in memory when a failure happens.
+ int32_t* FetchNext();
+
+ int32_t* LastSuccessfulMatch();
+
+ bool HasException() { return num_matches_ < 0; }
+
+ private:
+ int AdvanceZeroLength(int last_index);
+
+ int num_matches_;
+ int max_matches_;
+ int current_match_index_;
+ int registers_per_match_;
+ // Pointer to the last set of captures.
+ int32_t* register_array_;
+ int register_array_size_;
+ Handle<JSRegExp> regexp_;
+ Handle<String> subject_;
+ Isolate* isolate_;
+};
+
+// Caches results for specific regexp queries on the isolate. At the time of
+// writing, this is used during global calls to RegExp.prototype.exec and
+// @@split.
+class RegExpResultsCache final : public AllStatic {
+ public:
+ enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
+
+ // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
+ // On success, the returned result is guaranteed to be a COW-array.
+ static Object Lookup(Heap* heap, String key_string, Object key_pattern,
+ FixedArray* last_match_out, ResultsCacheType type);
+ // Attempt to add value_array to the cache specified by type. On success,
+ // value_array is turned into a COW-array.
+ static void Enter(Isolate* isolate, Handle<String> key_string,
+ Handle<Object> key_pattern, Handle<FixedArray> value_array,
+ Handle<FixedArray> last_match_cache, ResultsCacheType type);
+ static void Clear(FixedArray cache);
+
+ static constexpr int kRegExpResultsCacheSize = 0x100;
+
+ private:
+ static constexpr int kStringOffset = 0;
+ static constexpr int kPatternOffset = 1;
+ static constexpr int kArrayOffset = 2;
+ static constexpr int kLastMatchOffset = 3;
+ static constexpr int kArrayEntriesPerCacheEntry = 4;
+};
+
+} // namespace internal
+} // namespace v8
+
+#endif // V8_REGEXP_REGEXP_H_