diff options
Diffstat (limited to 'js/src/irregexp/RegExpNativeMacroAssembler.h')
-rw-r--r-- | js/src/irregexp/RegExpNativeMacroAssembler.h | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/js/src/irregexp/RegExpNativeMacroAssembler.h b/js/src/irregexp/RegExpNativeMacroAssembler.h new file mode 100644 index 0000000000..3c3acf40d4 --- /dev/null +++ b/js/src/irregexp/RegExpNativeMacroAssembler.h @@ -0,0 +1,308 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Copyright 2020 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This file implements the NativeRegExpMacroAssembler interface for +// SpiderMonkey. It provides the same interface as each of V8's +// architecture-specific implementations. + +#ifndef RegexpMacroAssemblerArch_h +#define RegexpMacroAssemblerArch_h + +#include "irregexp/imported/regexp-macro-assembler.h" +#include "jit/MacroAssembler.h" + +namespace v8 { +namespace internal { + +struct FrameData { + // Character position at the start of the input, stored as a + // negative offset from the end of the string (input_end_pointer_). + size_t inputStart; + + // The backtrack_stack_pointer_ register points to the top of the stack. + // This points to the bottom of the backtrack stack. + void* backtrackStackBase; + + // Copy of the input MatchPairs. + int32_t* matches; // pointer to capture array + int32_t numMatches; // size of capture array +}; + +class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler { + public: + SMRegExpMacroAssembler(JSContext* cx, js::jit::StackMacroAssembler& masm, + Zone* zone, Mode mode, uint32_t num_capture_registers); + virtual ~SMRegExpMacroAssembler() = default; + + virtual int stack_limit_slack(); + virtual IrregexpImplementation Implementation(); + + virtual bool Succeed(); + virtual void Fail(); + + virtual void AdvanceCurrentPosition(int by); + virtual void PopCurrentPosition(); + virtual void PushCurrentPosition(); + virtual void SetCurrentPositionFromEnd(int by); + + virtual void Backtrack(); + virtual void Bind(Label* label); + virtual void GoTo(Label* label); + virtual void PushBacktrack(Label* label); + + virtual void CheckCharacter(uint32_t c, Label* on_equal); + virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal); + virtual void CheckCharacterGT(base::uc16 limit, Label* on_greater); + virtual void CheckCharacterLT(base::uc16 limit, Label* on_less); + virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_equal); + virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, + Label* on_not_equal); + virtual void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus, + base::uc16 mask, + Label* on_not_equal); + virtual void CheckGreedyLoop(Label* on_tos_equals_current_position); + virtual void CheckCharacterInRange(base::uc16 from, base::uc16 to, + Label* on_in_range); + virtual void CheckCharacterNotInRange(base::uc16 from, base::uc16 to, + Label* on_not_in_range); + virtual bool CheckCharacterInRangeArray( + const ZoneList<CharacterRange>* ranges, Label* on_in_range); + virtual bool CheckCharacterNotInRangeArray( + const ZoneList<CharacterRange>* ranges, Label* on_not_in_range); + virtual void CheckAtStart(int cp_offset, Label* on_at_start); + virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start); + virtual void CheckPosition(int cp_offset, Label* on_outside_input); + virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set); + virtual bool CheckSpecialCharacterClass(StandardCharacterSet type, + Label* on_no_match); + virtual void CheckNotBackReference(int start_reg, bool read_backward, + Label* on_no_match); + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, bool unicode, + Label* on_no_match); + + virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input, + bool check_bounds, int characters, + int eats_at_least); + + virtual void AdvanceRegister(int reg, int by); + virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); + virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); + virtual void IfRegisterEqPos(int reg, Label* if_eq); + virtual void PopRegister(int register_index); + virtual void PushRegister(int register_index, + StackCheckFlag check_stack_limit); + virtual void ReadCurrentPositionFromRegister(int reg); + virtual void WriteCurrentPositionToRegister(int reg, int cp_offset); + virtual void ReadStackPointerFromRegister(int reg); + virtual void WriteStackPointerToRegister(int reg); + virtual void SetRegister(int register_index, int to); + virtual void ClearRegisters(int reg_from, int reg_to); + + virtual Handle<HeapObject> GetCode(Handle<String> source); + + virtual bool CanReadUnaligned() const; + + private: + size_t frameSize_ = 0; + + void createStackFrame(); + void initFrameAndRegs(); + void successHandler(); + void exitHandler(); + void backtrackHandler(); + void stackOverflowHandler(); + + // Push a register on the backtrack stack. + void Push(js::jit::Register value); + + // Pop a value from the backtrack stack. + void Pop(js::jit::Register target); + + void CheckAtStartImpl(int cp_offset, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond, + bool negate); + void CheckCharacterInRangeImpl(base::uc16 from, base::uc16 to, Label* on_cond, + js::jit::Assembler::Condition cond); + void CheckNotBackReferenceImpl(int start_reg, bool read_backward, + bool unicode, Label* on_no_match, + bool ignore_case); + void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges); + + void LoadCurrentCharacterUnchecked(int cp_offset, int characters); + + void JumpOrBacktrack(Label* to); + + // MacroAssembler methods that take a Label can be called with a + // null label, which means that we should backtrack if we would jump + // to that label. This is a helper to avoid writing out the same + // logic a dozen times. + inline js::jit::Label* LabelOrBacktrack(Label* to) { + return to ? to->inner() : &backtrack_label_; + } + + void CheckBacktrackStackLimit(); + + public: + static bool GrowBacktrackStack(RegExpStack* regexp_stack); + + static uint32_t CaseInsensitiveCompareNonUnicode(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + static uint32_t CaseInsensitiveCompareUnicode(const char16_t* substring1, + const char16_t* substring2, + size_t byteLength); + static bool IsCharacterInRangeArray(uint32_t c, ByteArrayData* ranges); + + private: + inline int char_size() { return static_cast<int>(mode_); } + inline js::jit::Scale factor() { + return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne; + } + + js::jit::Address inputStart() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, inputStart)); + } + js::jit::Address backtrackStackBase() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, backtrackStackBase)); + } + js::jit::Address matches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, matches)); + } + js::jit::Address numMatches() { + return js::jit::Address(masm_.getStackPointer(), + offsetof(FrameData, numMatches)); + } + + // The stack-pointer-relative location of a regexp register. + js::jit::Address register_location(int register_index) { + return js::jit::Address(masm_.getStackPointer(), + register_offset(register_index)); + } + + int32_t register_offset(int register_index) { + MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister); + if (num_registers_ <= register_index) { + num_registers_ = register_index + 1; + } + static_assert(alignof(uintptr_t) <= alignof(FrameData)); + return sizeof(FrameData) + register_index * sizeof(uintptr_t*); + } + + JSContext* cx_; + js::jit::StackMacroAssembler& masm_; + + /* + * This assembler uses the following registers: + * + * - current_character_: + * Contains the character (or characters) currently being examined. + * Must be loaded using LoadCurrentCharacter before using any of the + * dispatch methods. After a matching pass for a global regexp, + * temporarily stores the index of capture start. + * - current_position_: + * Current position in input *as negative byte offset from end of string*. + * - input_end_pointer_: + * Points to byte after last character in the input. current_position_ is + * relative to this. + * - backtrack_stack_pointer_: + * Points to tip of the (heap-allocated) backtrack stack. The stack grows + * downward (like the native stack). + * - temp0_, temp1_, temp2_: + * Scratch registers. + * + * The native stack pointer is used to access arguments (InputOutputData), + * local variables (FrameData), and irregexp's internal virtual registers + * (see register_location). + */ + + js::jit::Register current_character_; + js::jit::Register current_position_; + js::jit::Register input_end_pointer_; + js::jit::Register backtrack_stack_pointer_; + js::jit::Register temp0_, temp1_, temp2_; + + // These labels are used in various API calls and bound (if used) in + // GetCode. If we abort in the middle of a compilation, as may + // happen if a regexp is too big, they may be used but not + // bound. + js::jit::NonAssertingLabel entry_label_; + js::jit::NonAssertingLabel start_label_; + js::jit::NonAssertingLabel backtrack_label_; + js::jit::NonAssertingLabel success_label_; + js::jit::NonAssertingLabel exit_label_; + js::jit::NonAssertingLabel stack_overflow_label_; + js::jit::NonAssertingLabel exit_with_exception_label_; + + // When we generate the code to push a backtrack label's address + // onto the backtrack stack, we don't know its final address. We + // have to patch it after linking. This is slightly delicate, as the + // Label itself (which is allocated on the stack) may not exist by + // the time we link. The approach is as follows: + // + // 1. When we push a label on the backtrack stack (PushBacktrack), + // we bind the label's patchOffset_ field to the offset within + // the code that should be overwritten. This works because each + // label is only pushed by a single instruction. + // + // 2. When we bind a label (Bind), we check to see if it has a + // bound patchOffset_. If it does, we create a LabelPatch mapping + // its patch offset to the offset of the label itself. + // + // 3. While linking the code, we walk the list of label patches + // and patch the code accordingly. + class LabelPatch { + public: + LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) + : patchOffset_(patchOffset), labelOffset_(labelOffset) {} + + js::jit::CodeOffset patchOffset_; + size_t labelOffset_ = 0; + }; + + js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_; + void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) { + oomUnsafe.crash("Irregexp label patch"); + } + } + + Mode mode_; + int num_registers_; + int num_capture_registers_; + js::jit::LiveGeneralRegisterSet savedRegisters_; + + public: + using TableVector = + js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>; + TableVector& tables() { return tables_; } + + private: + TableVector tables_; + void AddTable(PseudoHandle<ByteArrayData> table) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!tables_.append(std::move(table))) { + oomUnsafe.crash("Irregexp table append"); + } + } +}; + +} // namespace internal +} // namespace v8 + +#endif // RegexpMacroAssemblerArch_h |