summaryrefslogtreecommitdiffstats
path: root/js/src/irregexp/RegExpNativeMacroAssembler.h
blob: 3c3acf40d45b43eea6622c4ef3b1275c3f50c7b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// This file implements the NativeRegExpMacroAssembler interface for
// SpiderMonkey. It provides the same interface as each of V8's
// architecture-specific implementations.

#ifndef RegexpMacroAssemblerArch_h
#define RegexpMacroAssemblerArch_h

#include "irregexp/imported/regexp-macro-assembler.h"
#include "jit/MacroAssembler.h"

namespace v8 {
namespace internal {

struct FrameData {
  // Character position at the start of the input, stored as a
  // negative offset from the end of the string (input_end_pointer_).
  size_t inputStart;

  // The backtrack_stack_pointer_ register points to the top of the stack.
  // This points to the bottom of the backtrack stack.
  void* backtrackStackBase;

  // Copy of the input MatchPairs.
  int32_t* matches;    // pointer to capture array
  int32_t numMatches;  // size of capture array
};

class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
 public:
  SMRegExpMacroAssembler(JSContext* cx, js::jit::StackMacroAssembler& masm,
                         Zone* zone, Mode mode, uint32_t num_capture_registers);
  virtual ~SMRegExpMacroAssembler() = default;

  virtual int stack_limit_slack();
  virtual IrregexpImplementation Implementation();

  virtual bool Succeed();
  virtual void Fail();

  virtual void AdvanceCurrentPosition(int by);
  virtual void PopCurrentPosition();
  virtual void PushCurrentPosition();
  virtual void SetCurrentPositionFromEnd(int by);

  virtual void Backtrack();
  virtual void Bind(Label* label);
  virtual void GoTo(Label* label);
  virtual void PushBacktrack(Label* label);

  virtual void CheckCharacter(uint32_t c, Label* on_equal);
  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
  virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater);
  virtual void CheckCharacterLT(base::uc16 limit, Label* on_less);
  virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
                                      Label* on_equal);
  virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
                                         Label* on_not_equal);
  virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus,
                                              base::uc16 mask,
                                              Label* on_not_equal);
  virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
  virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to,
                                     Label* on_in_range);
  virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
                                        Label* on_not_in_range);
  virtual bool CheckCharacterInRangeArray(
      const ZoneList<CharacterRange>* ranges, Label* on_in_range);
  virtual bool CheckCharacterNotInRangeArray(
      const ZoneList<CharacterRange>* ranges, Label* on_not_in_range);
  virtual void CheckAtStart(int cp_offset, Label* on_at_start);
  virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
  virtual void CheckPosition(int cp_offset, Label* on_outside_input);
  virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
  virtual bool CheckSpecialCharacterClass(StandardCharacterSet type,
                                          Label* on_no_match);
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
                                               bool read_backward, bool unicode,
                                               Label* on_no_match);

  virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
                                        bool check_bounds, int characters,
                                        int eats_at_least);

  virtual void AdvanceRegister(int reg, int by);
  virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
  virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
  virtual void IfRegisterEqPos(int reg, Label* if_eq);
  virtual void PopRegister(int register_index);
  virtual void PushRegister(int register_index,
                            StackCheckFlag check_stack_limit);
  virtual void ReadCurrentPositionFromRegister(int reg);
  virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
  virtual void ReadStackPointerFromRegister(int reg);
  virtual void WriteStackPointerToRegister(int reg);
  virtual void SetRegister(int register_index, int to);
  virtual void ClearRegisters(int reg_from, int reg_to);

  virtual Handle<HeapObject> GetCode(Handle<String> source);

  virtual bool CanReadUnaligned() const;

 private:
  size_t frameSize_ = 0;

  void createStackFrame();
  void initFrameAndRegs();
  void successHandler();
  void exitHandler();
  void backtrackHandler();
  void stackOverflowHandler();

  // Push a register on the backtrack stack.
  void Push(js::jit::Register value);

  // Pop a value from the backtrack stack.
  void Pop(js::jit::Register target);

  void CheckAtStartImpl(int cp_offset, Label* on_cond,
                        js::jit::Assembler::Condition cond);
  void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
                          js::jit::Assembler::Condition cond);
  void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
                                  bool negate);
  void CheckCharacterInRangeImpl(base::uc16 from, base::uc16 to, Label* on_cond,
                                 js::jit::Assembler::Condition cond);
  void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
                                 bool unicode, Label* on_no_match,
                                 bool ignore_case);
  void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);

  void LoadCurrentCharacterUnchecked(int cp_offset, int characters);

  void JumpOrBacktrack(Label* to);

  // MacroAssembler methods that take a Label can be called with a
  // null label, which means that we should backtrack if we would jump
  // to that label. This is a helper to avoid writing out the same
  // logic a dozen times.
  inline js::jit::Label* LabelOrBacktrack(Label* to) {
    return to ? to->inner() : &backtrack_label_;
  }

  void CheckBacktrackStackLimit();

 public:
  static bool GrowBacktrackStack(RegExpStack* regexp_stack);

  static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1,
                                                   const char16_t* substring2,
                                                   size_t byteLength);
  static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1,
                                                const char16_t* substring2,
                                                size_t byteLength);
  static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges);

 private:
  inline int char_size() { return static_cast<int>(mode_); }
  inline js::jit::Scale factor() {
    return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
  }

  js::jit::Address inputStart() {
    return js::jit::Address(masm_.getStackPointer(),
                            offsetof(FrameData, inputStart));
  }
  js::jit::Address backtrackStackBase() {
    return js::jit::Address(masm_.getStackPointer(),
                            offsetof(FrameData, backtrackStackBase));
  }
  js::jit::Address matches() {
    return js::jit::Address(masm_.getStackPointer(),
                            offsetof(FrameData, matches));
  }
  js::jit::Address numMatches() {
    return js::jit::Address(masm_.getStackPointer(),
                            offsetof(FrameData, numMatches));
  }

  // The stack-pointer-relative location of a regexp register.
  js::jit::Address register_location(int register_index) {
    return js::jit::Address(masm_.getStackPointer(),
                            register_offset(register_index));
  }

  int32_t register_offset(int register_index) {
    MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
    if (num_registers_ <= register_index) {
      num_registers_ = register_index + 1;
    }
    static_assert(alignof(uintptr_t) <= alignof(FrameData));
    return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
  }

  JSContext* cx_;
  js::jit::StackMacroAssembler& masm_;

  /*
   * This assembler uses the following registers:
   *
   * - current_character_:
   *     Contains the character (or characters) currently being examined.
   *     Must be loaded using LoadCurrentCharacter before using any of the
   *     dispatch methods. After a matching pass for a global regexp,
   *     temporarily stores the index of capture start.
   * - current_position_:
   *     Current position in input *as negative byte offset from end of string*.
   * - input_end_pointer_:
   *     Points to byte after last character in the input. current_position_ is
   *     relative to this.
   * - backtrack_stack_pointer_:
   *     Points to tip of the (heap-allocated) backtrack stack. The stack grows
   *     downward (like the native stack).
   * - temp0_, temp1_, temp2_:
   *     Scratch registers.
   *
   * The native stack pointer is used to access arguments (InputOutputData),
   * local variables (FrameData), and irregexp's internal virtual registers
   * (see register_location).
   */

  js::jit::Register current_character_;
  js::jit::Register current_position_;
  js::jit::Register input_end_pointer_;
  js::jit::Register backtrack_stack_pointer_;
  js::jit::Register temp0_, temp1_, temp2_;

  // These labels are used in various API calls and bound (if used) in
  // GetCode. If we abort in the middle of a compilation, as may
  // happen if a regexp is too big, they may be used but not
  // bound.
  js::jit::NonAssertingLabel entry_label_;
  js::jit::NonAssertingLabel start_label_;
  js::jit::NonAssertingLabel backtrack_label_;
  js::jit::NonAssertingLabel success_label_;
  js::jit::NonAssertingLabel exit_label_;
  js::jit::NonAssertingLabel stack_overflow_label_;
  js::jit::NonAssertingLabel exit_with_exception_label_;

  // When we generate the code to push a backtrack label's address
  // onto the backtrack stack, we don't know its final address. We
  // have to patch it after linking. This is slightly delicate, as the
  // Label itself (which is allocated on the stack) may not exist by
  // the time we link. The approach is as follows:
  //
  // 1. When we push a label on the backtrack stack (PushBacktrack),
  //    we bind the label's patchOffset_ field to the offset within
  //    the code that should be overwritten. This works because each
  //    label is only pushed by a single instruction.
  //
  // 2. When we bind a label (Bind), we check to see if it has a
  //    bound patchOffset_. If it does, we create a LabelPatch mapping
  //    its patch offset to the offset of the label itself.
  //
  // 3. While linking the code, we walk the list of label patches
  //    and patch the code accordingly.
  class LabelPatch {
   public:
    LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
        : patchOffset_(patchOffset), labelOffset_(labelOffset) {}

    js::jit::CodeOffset patchOffset_;
    size_t labelOffset_ = 0;
  };

  js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
  void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
      oomUnsafe.crash("Irregexp label patch");
    }
  }

  Mode mode_;
  int num_registers_;
  int num_capture_registers_;
  js::jit::LiveGeneralRegisterSet savedRegisters_;

 public:
  using TableVector =
      js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
  TableVector& tables() { return tables_; }

 private:
  TableVector tables_;
  void AddTable(PseudoHandle<ByteArrayData> table) {
    js::AutoEnterOOMUnsafeRegion oomUnsafe;
    if (!tables_.append(std::move(table))) {
      oomUnsafe.crash("Irregexp table append");
    }
  }
};

}  // namespace internal
}  // namespace v8

#endif  // RegexpMacroAssemblerArch_h