summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/imported/regexp.h
blob: b6c8e43e47ec53fbac56fbfdb3d67b949e96d1c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_REGEXP_REGEXP_H_
#define V8_REGEXP_REGEXP_H_

#include "irregexp/imported/regexp-error.h"
#include "irregexp/RegExpShim.h"

namespace v8 {
namespace internal {

class JSRegExp;
class RegExpCapture;
class RegExpMatchInfo;
class RegExpNode;
class RegExpTree;

enum class RegExpCompilationTarget : int { kBytecode, kNative };

// TODO(jgruber): Do not expose in regexp.h.
// TODO(jgruber): Consider splitting between ParseData and CompileData.
struct RegExpCompileData {
  // The parsed AST as produced by the RegExpParser.
  RegExpTree* tree = nullptr;

  // The compiled Node graph as produced by RegExpTree::ToNode methods.
  RegExpNode* node = nullptr;

  // Either the generated code as produced by the compiler or a trampoline
  // to the interpreter.
  Handle<Object> code;

  // True, iff the pattern is a 'simple' atom with zero captures. In other
  // words, the pattern consists of a string with no metacharacters and special
  // regexp features, and can be implemented as a standard string search.
  bool simple = true;

  // True, iff the pattern is anchored at the start of the string with '^'.
  bool contains_anchor = false;

  // Only set if the pattern contains named captures.
  // Note: the lifetime equals that of the parse/compile zone.
  ZoneVector<RegExpCapture*>* named_captures = nullptr;

  // The error message. Only used if an error occurred during parsing or
  // compilation.
  RegExpError error = RegExpError::kNone;

  // The position at which the error was detected. Only used if an
  // error occurred.
  int error_pos = 0;

  // The number of capture groups, without the global capture \0.
  int capture_count = 0;

  // The number of registers used by the generated code.
  int register_count = 0;

  // The compilation target (bytecode or native code).
  RegExpCompilationTarget compilation_target;
};

class RegExp final : public AllStatic {
 public:
  // Whether the irregexp engine generates interpreter bytecode.
  static bool CanGenerateBytecode();

  // Verify that the given flags combination is valid.
  V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags);

  // Verify the given pattern, i.e. check that parsing succeeds. If
  // verification fails, `regexp_error_out` is set.
  template <class CharT>
  static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
                           const CharT* input, int input_length,
                           RegExpFlags flags, RegExpError* regexp_error_out,
                           const DisallowGarbageCollection& no_gc);

  // Parses the RegExp pattern and prepares the JSRegExp object with
  // generic data and choice of implementation - as well as what
  // the implementation wants to store in the data field.
  // Returns false if compilation fails.
  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
      Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
      RegExpFlags flags, uint32_t backtrack_limit);

  // Ensures that a regexp is fully compiled and ready to be executed on a
  // subject string.  Returns true on success. Return false on failure, and
  // then an exception will be pending.
  V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
                                                        Handle<JSRegExp> re,
                                                        Handle<String> subject);

  enum CallOrigin : int {
    kFromRuntime = 0,
    kFromJs = 1,
  };

  enum class ExecQuirks {
    kNone,
    // Used to work around an issue in the RegExpPrototypeSplit fast path,
    // which diverges from the spec by not creating a sticky copy of the RegExp
    // instance and calling `exec` in a loop. If called in this context, we
    // must not update the last_match_info on a successful match at the subject
    // string end. See crbug.com/1075514 for more information.
    kTreatMatchAtEndAsFailure,
  };

  // See ECMA-262 section 15.10.6.2.
  // This function calls the garbage collector if necessary.
  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
      Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
      int index, Handle<RegExpMatchInfo> last_match_info,
      ExecQuirks exec_quirks = ExecQuirks::kNone);

  V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
  ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
                          Handle<String> subject, int index,
                          Handle<RegExpMatchInfo> last_match_info,
                          ExecQuirks exec_quirks = ExecQuirks::kNone);

  // Integral return values used throughout regexp code layers.
  static constexpr int kInternalRegExpFailure = 0;
  static constexpr int kInternalRegExpSuccess = 1;
  static constexpr int kInternalRegExpException = -1;
  static constexpr int kInternalRegExpRetry = -2;
  static constexpr int kInternalRegExpFallbackToExperimental = -3;
  static constexpr int kInternalRegExpSmallestResult = -3;

  enum IrregexpResult : int32_t {
    RE_FAILURE = kInternalRegExpFailure,
    RE_SUCCESS = kInternalRegExpSuccess,
    RE_EXCEPTION = kInternalRegExpException,
    RE_RETRY = kInternalRegExpRetry,
    RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
  };

  // Set last match info.  If match is nullptr, then setting captures is
  // omitted.
  static Handle<RegExpMatchInfo> SetLastMatchInfo(
      Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
      Handle<String> subject, int capture_count, int32_t* match);

  V8_EXPORT_PRIVATE static bool CompileForTesting(
      Isolate* isolate, Zone* zone, RegExpCompileData* input, RegExpFlags flags,
      Handle<String> pattern, Handle<String> sample_subject, bool is_one_byte);

  V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
                                                   RegExpNode* node);

  static const int kRegExpTooLargeToOptimize = 20 * KB;

  V8_WARN_UNUSED_RESULT
  static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
                                                  Handle<JSRegExp> re,
                                                  Handle<String> pattern,
                                                  RegExpError error);
  static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
                                   RegExpError error_text);

  static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);

  static Handle<FixedArray> CreateCaptureNameMap(
      Isolate* isolate, ZoneVector<RegExpCapture*>* named_captures);
};

// Uses a special global mode of irregexp-generated code to perform a global
// search and return multiple results at once. As such, this is essentially an
// iterator over multiple results (retrieved batch-wise in advance).
class RegExpGlobalCache final {
 public:
  RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
                    Isolate* isolate);

  ~RegExpGlobalCache();

  // Fetch the next entry in the cache for global regexp match results.
  // This does not set the last match info.  Upon failure, nullptr is
  // returned. The cause can be checked with Result().  The previous result is
  // still in available in memory when a failure happens.
  int32_t* FetchNext();

  int32_t* LastSuccessfulMatch();

  bool HasException() { return num_matches_ < 0; }

 private:
  int AdvanceZeroLength(int last_index);

  int num_matches_;
  int max_matches_;
  int current_match_index_;
  int registers_per_match_;
  // Pointer to the last set of captures.
  int32_t* register_array_;
  int register_array_size_;
  Handle<JSRegExp> regexp_;
  Handle<String> subject_;
  Isolate* isolate_;
};

// Caches results for specific regexp queries on the isolate. At the time of
// writing, this is used during global calls to RegExp.prototype.exec and
// @@split.
class RegExpResultsCache final : public AllStatic {
 public:
  enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };

  // Attempt to retrieve a cached result.  On failure, 0 is returned as a Smi.
  // On success, the returned result is guaranteed to be a COW-array.
  static Object Lookup(Heap* heap, String key_string, Object key_pattern,
                       FixedArray* last_match_out, ResultsCacheType type);
  // Attempt to add value_array to the cache specified by type.  On success,
  // value_array is turned into a COW-array.
  static void Enter(Isolate* isolate, Handle<String> key_string,
                    Handle<Object> key_pattern, Handle<FixedArray> value_array,
                    Handle<FixedArray> last_match_cache, ResultsCacheType type);
  static void Clear(FixedArray cache);

  static constexpr int kRegExpResultsCacheSize = 0x100;

 private:
  static constexpr int kStringOffset = 0;
  static constexpr int kPatternOffset = 1;
  static constexpr int kArrayOffset = 2;
  static constexpr int kLastMatchOffset = 3;
  static constexpr int kArrayEntriesPerCacheEntry = 4;
};

}  // namespace internal
}  // namespace v8

#endif  // V8_REGEXP_REGEXP_H_