summaryrefslogtreecommitdiffstats
path: root/toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h
parentInitial commit. (diff)
downloadthunderbird-upstream.tar.xz
thunderbird-upstream.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h')
-rw-r--r--toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h1739
1 files changed, 1739 insertions, 0 deletions
diff --git a/toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h b/toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h
new file mode 100644
index 0000000000..e0b33c7add
--- /dev/null
+++ b/toolkit/xre/dllservices/mozglue/interceptor/PatcherDetour.h
@@ -0,0 +1,1739 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_interceptor_PatcherDetour_h
+#define mozilla_interceptor_PatcherDetour_h
+
+#if defined(_M_ARM64)
+# include "mozilla/interceptor/Arm64.h"
+#endif // defined(_M_ARM64)
+#include <utility>
+
+#include "mozilla/Maybe.h"
+#include "mozilla/NativeNt.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/Types.h"
+#include "mozilla/Unused.h"
+#include "mozilla/interceptor/PatcherBase.h"
+#include "mozilla/interceptor/Trampoline.h"
+#include "mozilla/interceptor/VMSharingPolicies.h"
+
+#define COPY_CODES(NBYTES) \
+ do { \
+ tramp.CopyCodes(origBytes.GetAddress(), NBYTES); \
+ origBytes += NBYTES; \
+ } while (0)
+
+namespace mozilla {
+namespace interceptor {
+
+enum class DetourFlags : uint32_t {
+ eDefault = 0,
+ eEnable10BytePatch = 1, // Allow 10-byte patches when conditions allow
+ eTestOnlyForceShortPatch =
+ 2, // Force short patches at all times (x86-64 and arm64 testing only)
+ eDontResolveRedirection =
+ 4, // Don't resolve the redirection of JMP (e.g. kernel32 -> kernelbase)
+};
+
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(DetourFlags)
+
+// This class is responsible to do tasks which depend on MMPolicy, decoupled
+// from VMPolicy. We already have WindowsDllPatcherBase, but it needs to
+// depend on VMPolicy to hold an instance of VMPolicy as a member.
+template <typename MMPolicyT>
+class WindowsDllDetourPatcherPrimitive {
+ protected:
+#if defined(_M_ARM64)
+ // LDR x16, .+8
+ static const uint32_t kLdrX16Plus8 = 0x58000050U;
+#endif // defined(_M_ARM64)
+
+ static void ApplyDefaultPatch(WritableTargetFunction<MMPolicyT>& target,
+ intptr_t aDest) {
+#if defined(_M_IX86)
+ target.WriteByte(0xe9); // jmp
+ target.WriteDisp32(aDest); // hook displacement
+#elif defined(_M_X64)
+ // mov r11, address
+ target.WriteByte(0x49);
+ target.WriteByte(0xbb);
+ target.WritePointer(aDest);
+
+ // jmp r11
+ target.WriteByte(0x41);
+ target.WriteByte(0xff);
+ target.WriteByte(0xe3);
+#elif defined(_M_ARM64)
+ // The default patch requires 16 bytes
+ // LDR x16, .+8
+ target.WriteLong(kLdrX16Plus8);
+ // BR x16
+ target.WriteLong(arm64::BuildUnconditionalBranchToRegister(16));
+ target.WritePointer(aDest);
+#else
+# error "Unsupported processor architecture"
+#endif
+ }
+
+ public:
+ constexpr static uint32_t GetWorstCaseRequiredBytesToPatch() {
+#if defined(_M_IX86)
+ return 5;
+#elif defined(_M_X64)
+ return 13;
+#elif defined(_M_ARM64)
+ return 16;
+#else
+# error "Unsupported processor architecture"
+#endif
+ }
+
+ WindowsDllDetourPatcherPrimitive() = default;
+
+ WindowsDllDetourPatcherPrimitive(const WindowsDllDetourPatcherPrimitive&) =
+ delete;
+ WindowsDllDetourPatcherPrimitive(WindowsDllDetourPatcherPrimitive&&) = delete;
+ WindowsDllDetourPatcherPrimitive& operator=(
+ const WindowsDllDetourPatcherPrimitive&) = delete;
+ WindowsDllDetourPatcherPrimitive& operator=(
+ WindowsDllDetourPatcherPrimitive&&) = delete;
+
+ bool AddIrreversibleHook(const MMPolicyT& aMMPolicy, FARPROC aTargetFn,
+ intptr_t aHookDest) {
+ ReadOnlyTargetFunction<MMPolicyT> targetReadOnly(aMMPolicy, aTargetFn);
+
+ WritableTargetFunction<MMPolicyT> targetWritable(
+ targetReadOnly.Promote(GetWorstCaseRequiredBytesToPatch()));
+ if (!targetWritable) {
+ return false;
+ }
+
+ ApplyDefaultPatch(targetWritable, aHookDest);
+
+ return targetWritable.Commit();
+ }
+};
+
+template <typename VMPolicy>
+class WindowsDllDetourPatcher final
+ : public WindowsDllDetourPatcherPrimitive<typename VMPolicy::MMPolicyT>,
+ public WindowsDllPatcherBase<VMPolicy> {
+ using MMPolicyT = typename VMPolicy::MMPolicyT;
+ using TrampPoolT = typename VMPolicy::PoolType;
+ using PrimitiveT = WindowsDllDetourPatcherPrimitive<MMPolicyT>;
+ Maybe<DetourFlags> mFlags;
+
+ public:
+ template <typename... Args>
+ explicit WindowsDllDetourPatcher(Args&&... aArgs)
+ : WindowsDllPatcherBase<VMPolicy>(std::forward<Args>(aArgs)...) {}
+
+ ~WindowsDllDetourPatcher() { Clear(); }
+
+ WindowsDllDetourPatcher(const WindowsDllDetourPatcher&) = delete;
+ WindowsDllDetourPatcher(WindowsDllDetourPatcher&&) = delete;
+ WindowsDllDetourPatcher& operator=(const WindowsDllDetourPatcher&) = delete;
+ WindowsDllDetourPatcher& operator=(WindowsDllDetourPatcher&&) = delete;
+
+ void Clear() {
+ if (!this->mVMPolicy.ShouldUnhookUponDestruction()) {
+ return;
+ }
+
+#if defined(_M_IX86)
+ size_t nBytes = 1 + sizeof(intptr_t);
+#elif defined(_M_X64)
+ size_t nBytes = 2 + sizeof(intptr_t);
+#elif defined(_M_ARM64)
+ size_t nBytes = 2 * sizeof(uint32_t) + sizeof(uintptr_t);
+#else
+# error "Unknown processor type"
+#endif
+
+ const auto& tramps = this->mVMPolicy.Items();
+ for (auto&& tramp : tramps) {
+ // First we read the pointer to the interceptor instance.
+ Maybe<uintptr_t> instance = tramp.ReadEncodedPointer();
+ if (!instance) {
+ continue;
+ }
+
+ if (instance.value() != reinterpret_cast<uintptr_t>(this)) {
+ // tramp does not belong to this interceptor instance.
+ continue;
+ }
+
+ auto clearInstance = MakeScopeExit([&tramp]() -> void {
+ // Clear the instance pointer so that no future instances with the same
+ // |this| pointer will attempt to reset its hook.
+ tramp.Rewind();
+ tramp.WriteEncodedPointer(nullptr);
+ });
+
+ // Now we read the pointer to the intercepted function.
+ Maybe<uintptr_t> interceptedFn = tramp.ReadEncodedPointer();
+ if (!interceptedFn) {
+ continue;
+ }
+
+ WritableTargetFunction<MMPolicyT> origBytes(
+ this->mVMPolicy, interceptedFn.value(), nBytes);
+ if (!origBytes) {
+ continue;
+ }
+
+#if defined(_M_IX86) || defined(_M_X64)
+
+ Maybe<uint8_t> maybeOpcode1 = origBytes.ReadByte();
+ if (!maybeOpcode1) {
+ continue;
+ }
+
+ uint8_t opcode1 = maybeOpcode1.value();
+
+# if defined(_M_IX86)
+ // Ensure the JMP from CreateTrampoline is where we expect it to be.
+ MOZ_ASSERT(opcode1 == 0xE9);
+ if (opcode1 != 0xE9) {
+ continue;
+ }
+
+ intptr_t startOfTrampInstructions =
+ static_cast<intptr_t>(tramp.GetCurrentRemoteAddress());
+
+ origBytes.WriteDisp32(startOfTrampInstructions);
+ if (!origBytes) {
+ continue;
+ }
+
+ origBytes.Commit();
+# elif defined(_M_X64)
+ // Note: At the moment we clear 13-byte patches by replacing the jump to
+ // the patched function by a jump to the stub code. The original
+ // bytes of the original function are *not* restored. This implies
+ // that the stub code outlives our cleaning, so unwind information
+ // remains useful and must not be removed here.
+ if (opcode1 == 0x49) {
+ if (!Clear13BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+ continue;
+ }
+ } else if (opcode1 == 0xB8) {
+ if (!Clear10BytePatch(origBytes)) {
+ continue;
+ }
+ } else if (opcode1 == 0x48) {
+ // The original function was just a different trampoline
+ if (!ClearTrampolinePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+ continue;
+ }
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+ continue;
+ }
+# endif
+
+#elif defined(_M_ARM64)
+
+ // Ensure that we see the instruction that we expect
+ Maybe<uint32_t> inst1 = origBytes.ReadLong();
+ if (!inst1) {
+ continue;
+ }
+
+ if (inst1.value() == this->kLdrX16Plus8) {
+ if (!Clear16BytePatch(origBytes, tramp.GetCurrentRemoteAddress())) {
+ continue;
+ }
+ } else if (arm64::IsUnconditionalBranchImm(inst1.value())) {
+ if (!Clear4BytePatch(inst1.value(), origBytes)) {
+ continue;
+ }
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+ continue;
+ }
+
+#else
+# error "Unknown processor type"
+#endif
+ }
+
+ this->mVMPolicy.Clear();
+ }
+
+#if defined(_M_X64)
+ bool Clear13BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+ const uintptr_t aResetToAddress) {
+ Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
+ if (!maybeOpcode2) {
+ return false;
+ }
+
+ uint8_t opcode2 = maybeOpcode2.value();
+ if (opcode2 != 0xBB) {
+ return false;
+ }
+
+ aOrigBytes.WritePointer(aResetToAddress);
+ if (!aOrigBytes) {
+ return false;
+ }
+
+ return aOrigBytes.Commit();
+ }
+
+ bool ClearTrampolinePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+ const uintptr_t aPtrToResetToAddress) {
+ // The target of the trampoline we replaced is stored at
+ // aPtrToResetToAddress. We simply put it back where we got it from.
+ Maybe<uint8_t> maybeOpcode2 = aOrigBytes.ReadByte();
+ if (!maybeOpcode2) {
+ return false;
+ }
+
+ uint8_t opcode2 = maybeOpcode2.value();
+ if (opcode2 != 0xB8) {
+ return false;
+ }
+
+ auto oldPtr = *(reinterpret_cast<const uintptr_t*>(aPtrToResetToAddress));
+
+ aOrigBytes.WritePointer(oldPtr);
+ if (!aOrigBytes) {
+ return false;
+ }
+
+ return aOrigBytes.Commit();
+ }
+
+ bool Clear10BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes) {
+ Maybe<uint32_t> maybePtr32 = aOrigBytes.ReadLong();
+ if (!maybePtr32) {
+ return false;
+ }
+
+ uint32_t ptr32 = maybePtr32.value();
+ // We expect the high bit to be clear
+ if (ptr32 & 0x80000000) {
+ return false;
+ }
+
+ uintptr_t trampPtr = ptr32;
+
+ // trampPtr points to an intermediate trampoline that contains a 13-byte
+ // patch. We back up by sizeof(uintptr_t) so that we can access the pointer
+ // to the stub trampoline.
+ WritableTargetFunction<MMPolicyT> writableIntermediate(
+ this->mVMPolicy, trampPtr - sizeof(uintptr_t), 13 + sizeof(uintptr_t));
+ if (!writableIntermediate) {
+ return false;
+ }
+
+ Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
+ if (!stubTramp || !stubTramp.value()) {
+ return false;
+ }
+
+ Maybe<uint8_t> maybeOpcode1 = writableIntermediate.ReadByte();
+ if (!maybeOpcode1) {
+ return false;
+ }
+
+ // We expect this opcode to be the beginning of our normal mov r11, ptr
+ // patch sequence.
+ uint8_t opcode1 = maybeOpcode1.value();
+ if (opcode1 != 0x49) {
+ return false;
+ }
+
+ // Now we can just delegate the rest to our normal 13-byte patch clearing.
+ return Clear13BytePatch(writableIntermediate, stubTramp.value());
+ }
+#endif // defined(_M_X64)
+
+#if defined(_M_ARM64)
+ bool Clear4BytePatch(const uint32_t aBranchImm,
+ WritableTargetFunction<MMPolicyT>& aOrigBytes) {
+ MOZ_ASSERT(arm64::IsUnconditionalBranchImm(aBranchImm));
+
+ arm64::LoadOrBranch decoded = arm64::BUncondImmDecode(
+ aOrigBytes.GetCurrentAddress() - sizeof(uint32_t), aBranchImm);
+
+ uintptr_t trampPtr = decoded.mAbsAddress;
+
+ // trampPtr points to an intermediate trampoline that contains a veneer.
+ // We back up by sizeof(uintptr_t) so that we can access the pointer to the
+ // stub trampoline.
+
+ // We want trampLen to be the size of the veneer, plus one pointer (since
+ // we are backing up trampPtr by one pointer)
+ size_t trampLen = 16 + sizeof(uintptr_t);
+
+ WritableTargetFunction<MMPolicyT> writableIntermediate(
+ this->mVMPolicy, trampPtr - sizeof(uintptr_t), trampLen);
+ if (!writableIntermediate) {
+ return false;
+ }
+
+ Maybe<uintptr_t> stubTramp = writableIntermediate.ReadEncodedPtr();
+ if (!stubTramp || !stubTramp.value()) {
+ return false;
+ }
+
+ Maybe<uint32_t> inst1 = writableIntermediate.ReadLong();
+ if (!inst1 || inst1.value() != this->kLdrX16Plus8) {
+ return false;
+ }
+
+ return Clear16BytePatch(writableIntermediate, stubTramp.value());
+ }
+
+ bool Clear16BytePatch(WritableTargetFunction<MMPolicyT>& aOrigBytes,
+ const uintptr_t aResetToAddress) {
+ Maybe<uint32_t> inst2 = aOrigBytes.ReadLong();
+ if (!inst2) {
+ return false;
+ }
+
+ if (inst2.value() != arm64::BuildUnconditionalBranchToRegister(16)) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized patch!");
+ return false;
+ }
+
+ // Clobber the pointer to our hook function with a pointer to the
+ // start of the trampoline.
+ aOrigBytes.WritePointer(aResetToAddress);
+ aOrigBytes.Commit();
+
+ return true;
+ }
+#endif // defined(_M_ARM64)
+
+ void Init(DetourFlags aFlags = DetourFlags::eDefault) {
+ if (Initialized()) {
+ return;
+ }
+
+#if defined(_M_X64)
+ if (aFlags & DetourFlags::eTestOnlyForceShortPatch) {
+ aFlags |= DetourFlags::eEnable10BytePatch;
+ }
+#endif // defined(_M_X64)
+
+ mFlags = Some(aFlags);
+ }
+
+ bool Initialized() const { return mFlags.isSome(); }
+
+ bool AddHook(FARPROC aTargetFn, intptr_t aHookDest, void** aOrigFunc) {
+ ReadOnlyTargetFunction<MMPolicyT> target(
+ (mFlags.value() & DetourFlags::eDontResolveRedirection)
+ ? ReadOnlyTargetFunction<MMPolicyT>(
+ this->mVMPolicy, reinterpret_cast<uintptr_t>(aTargetFn))
+ : this->ResolveRedirectedAddress(aTargetFn));
+
+ TrampPoolT* trampPool = nullptr;
+
+#if defined(_M_ARM64)
+ // ARM64 uses two passes to build its trampoline. The first pass uses a
+ // null tramp to determine how many bytes are needed. Once that is known,
+ // CreateTrampoline calls itself recursively with a "real" tramp.
+ Trampoline<MMPolicyT> tramp(nullptr);
+#else
+ Maybe<TrampPoolT> maybeTrampPool = DoReserve();
+ MOZ_ASSERT(maybeTrampPool);
+ if (!maybeTrampPool) {
+ return false;
+ }
+
+ trampPool = maybeTrampPool.ptr();
+
+ Maybe<Trampoline<MMPolicyT>> maybeTramp(trampPool->GetNextTrampoline());
+ if (!maybeTramp) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_NEXT_TRAMPOLINE_ERROR);
+ return false;
+ }
+
+ Trampoline<MMPolicyT> tramp(std::move(maybeTramp.ref()));
+#endif
+
+ CreateTrampoline(target, trampPool, tramp, aHookDest, aOrigFunc);
+ if (!*aOrigFunc) {
+ return false;
+ }
+
+ return true;
+ }
+
+ private:
+ /**
+ * This function returns a maximum distance that can be reached by a single
+ * unconditional jump instruction. This is dependent on the processor ISA.
+ * Note that this distance is *exclusive* when added to the pivot, so the
+ * distance returned by this function is actually
+ * (maximum_absolute_offset + 1).
+ */
+ static uint32_t GetDefaultPivotDistance() {
+#if defined(_M_ARM64)
+ // Immediate unconditional branch allows for +/- 128MB
+ return 0x08000000U;
+#elif defined(_M_IX86) || defined(_M_X64)
+ // For these ISAs, our distance will assume the use of an unconditional jmp
+ // with a 32-bit signed displacement.
+ return 0x80000000U;
+#else
+# error "Not defined for this processor arch"
+#endif
+ }
+
+ /**
+ * If we're reserving trampoline space for a specific module, we base the
+ * pivot off of the median address of the module's .text section. While this
+ * may not be precise, it should be accurate enough for our purposes: To
+ * ensure that the trampoline space is reachable by any executable code in the
+ * module.
+ */
+ Maybe<TrampPoolT> ReserveForModule(HMODULE aModule) {
+ nt::PEHeaders moduleHeaders(aModule);
+ if (!moduleHeaders) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_PE_ERROR);
+ return Nothing();
+ }
+
+ Maybe<Span<const uint8_t>> textSectionInfo =
+ moduleHeaders.GetTextSectionInfo();
+ if (!textSectionInfo) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_TEXT_ERROR);
+ return Nothing();
+ }
+
+ const uint8_t* median = textSectionInfo.value().data() +
+ (textSectionInfo.value().LengthBytes() / 2);
+
+ Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(
+ reinterpret_cast<uintptr_t>(median), GetDefaultPivotDistance());
+ if (!maybeTrampPool) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_RESERVE_FOR_MODULE_RESERVE_ERROR);
+ }
+ return maybeTrampPool;
+ }
+
+ Maybe<TrampPoolT> DoReserve(HMODULE aModule = nullptr) {
+ if (aModule) {
+ return ReserveForModule(aModule);
+ }
+
+ uintptr_t pivot = 0;
+ uint32_t distance = 0;
+
+#if defined(_M_X64)
+ if (mFlags.value() & DetourFlags::eEnable10BytePatch) {
+ // We must stay below the 2GB mark because a 10-byte patch uses movsxd
+ // (ie, sign extension) to expand the pointer to 64-bits, so bit 31 of any
+ // pointers into the reserved region must be 0.
+ pivot = 0x40000000U;
+ distance = 0x40000000U;
+ }
+#endif // defined(_M_X64)
+
+ Maybe<TrampPoolT> maybeTrampPool = this->mVMPolicy.Reserve(pivot, distance);
+#if defined(NIGHTLY_BUILD)
+ if (!maybeTrampPool && this->GetLastDetourError().isNothing()) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_DO_RESERVE_ERROR);
+ }
+#endif // defined(NIGHTLY_BUILD)
+ return maybeTrampPool;
+ }
+
+ protected:
+#if !defined(_M_ARM64)
+
+ const static int kPageSize = 4096;
+
+ // rex bits
+ static const BYTE kMaskHighNibble = 0xF0;
+ static const BYTE kRexOpcode = 0x40;
+ static const BYTE kMaskRexW = 0x08;
+ static const BYTE kMaskRexR = 0x04;
+ static const BYTE kMaskRexX = 0x02;
+ static const BYTE kMaskRexB = 0x01;
+
+ // mod r/m bits
+ static const BYTE kRegFieldShift = 3;
+ static const BYTE kMaskMod = 0xC0;
+ static const BYTE kMaskReg = 0x38;
+ static const BYTE kMaskRm = 0x07;
+ static const BYTE kRmNeedSib = 0x04;
+ static const BYTE kModReg = 0xC0;
+ static const BYTE kModDisp32 = 0x80;
+ static const BYTE kModDisp8 = 0x40;
+ static const BYTE kModNoRegDisp = 0x00;
+ static const BYTE kRmNoRegDispDisp32 = 0x05;
+
+ // sib bits
+ static const BYTE kMaskSibScale = 0xC0;
+ static const BYTE kMaskSibIndex = 0x38;
+ static const BYTE kMaskSibBase = 0x07;
+ static const BYTE kSibBaseEbp = 0x05;
+
+ // Register bit IDs.
+ static const BYTE kRegAx = 0x0;
+ static const BYTE kRegCx = 0x1;
+ static const BYTE kRegDx = 0x2;
+ static const BYTE kRegBx = 0x3;
+ static const BYTE kRegSp = 0x4;
+ static const BYTE kRegBp = 0x5;
+ static const BYTE kRegSi = 0x6;
+ static const BYTE kRegDi = 0x7;
+
+ // Special ModR/M codes. These indicate operands that cannot be simply
+ // memcpy-ed.
+ // Operand is a 64-bit RIP-relative address.
+ static const int kModOperand64 = -2;
+ // Operand is not yet handled by our trampoline.
+ static const int kModUnknown = -1;
+
+ /**
+ * Returns the number of bytes taken by the ModR/M byte, SIB (if present)
+ * and the instruction's operand. In special cases, the special MODRM codes
+ * above are returned.
+ * aModRm points to the ModR/M byte of the instruction.
+ * On return, aSubOpcode (if present) is filled with the subopcode/register
+ * code found in the ModR/M byte.
+ */
+ int CountModRmSib(const ReadOnlyTargetFunction<MMPolicyT>& aModRm,
+ BYTE* aSubOpcode = nullptr) {
+ int numBytes = 1; // Start with 1 for mod r/m byte itself
+ switch (*aModRm & kMaskMod) {
+ case kModReg:
+ return numBytes;
+ case kModDisp8:
+ numBytes += 1;
+ break;
+ case kModDisp32:
+ numBytes += 4;
+ break;
+ case kModNoRegDisp:
+ if ((*aModRm & kMaskRm) == kRmNoRegDispDisp32) {
+# if defined(_M_X64)
+ if (aSubOpcode) {
+ *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
+ }
+ return kModOperand64;
+# else
+ // On IA-32, all ModR/M instruction modes address memory relative to 0
+ numBytes += 4;
+# endif
+ } else if (((*aModRm & kMaskRm) == kRmNeedSib &&
+ (*(aModRm + 1) & kMaskSibBase) == kSibBaseEbp)) {
+ numBytes += 4;
+ }
+ break;
+ default:
+ // This should not be reachable
+ MOZ_ASSERT_UNREACHABLE("Impossible value for modr/m byte mod bits");
+ return kModUnknown;
+ }
+ if ((*aModRm & kMaskRm) == kRmNeedSib) {
+ // SIB byte
+ numBytes += 1;
+ }
+ if (aSubOpcode) {
+ *aSubOpcode = (*aModRm & kMaskReg) >> kRegFieldShift;
+ }
+ return numBytes;
+ }
+
+# if defined(_M_X64)
+ enum class JumpType{Je, Jne, Jae, Jmp, Call};
+
+ static bool GenerateJump(Trampoline<MMPolicyT>& aTramp,
+ uintptr_t aAbsTargetAddress, const JumpType aType) {
+ // Near call, absolute indirect, address given in r/m32
+ if (aType == JumpType::Call) {
+ // CALL [RIP+0]
+ aTramp.WriteByte(0xff);
+ aTramp.WriteByte(0x15);
+ // The offset to jump destination -- 2 bytes after the current position.
+ aTramp.WriteInteger(2);
+ aTramp.WriteByte(0xeb); // JMP + 8 (jump over target address)
+ aTramp.WriteByte(8);
+ aTramp.WritePointer(aAbsTargetAddress);
+ return !!aTramp;
+ }
+
+ // Write an opposite conditional jump because the destination branches
+ // are swapped.
+ if (aType == JumpType::Je) {
+ // JNE RIP+14
+ aTramp.WriteByte(0x75);
+ aTramp.WriteByte(14);
+ } else if (aType == JumpType::Jne) {
+ // JE RIP+14
+ aTramp.WriteByte(0x74);
+ aTramp.WriteByte(14);
+ } else if (aType == JumpType::Jae) {
+ // JB RIP+14
+ aTramp.WriteByte(0x72);
+ aTramp.WriteByte(14);
+ }
+
+ // Near jmp, absolute indirect, address given in r/m32
+ // JMP [RIP+0]
+ aTramp.WriteByte(0xff);
+ aTramp.WriteByte(0x25);
+ // The offset to jump destination is 0
+ aTramp.WriteInteger(0);
+ aTramp.WritePointer(aAbsTargetAddress);
+
+ return !!aTramp;
+ }
+# endif
+
+ enum ePrefixGroupBits{eNoPrefixes = 0, ePrefixGroup1 = (1 << 0),
+ ePrefixGroup2 = (1 << 1), ePrefixGroup3 = (1 << 2),
+ ePrefixGroup4 = (1 << 3)};
+
+ int CountPrefixBytes(const ReadOnlyTargetFunction<MMPolicyT>& aBytes,
+ unsigned char* aOutGroupBits) {
+ unsigned char& groupBits = *aOutGroupBits;
+ groupBits = eNoPrefixes;
+ int index = 0;
+ while (true) {
+ switch (aBytes[index]) {
+ // Group 1
+ case 0xF0: // LOCK
+ case 0xF2: // REPNZ
+ case 0xF3: // REP / REPZ
+ if (groupBits & ePrefixGroup1) {
+ return -1;
+ }
+ groupBits |= ePrefixGroup1;
+ ++index;
+ break;
+
+ // Group 2
+ case 0x2E: // CS override / branch not taken
+ case 0x36: // SS override
+ case 0x3E: // DS override / branch taken
+ case 0x64: // FS override
+ case 0x65: // GS override
+ if (groupBits & ePrefixGroup2) {
+ return -1;
+ }
+ groupBits |= ePrefixGroup2;
+ ++index;
+ break;
+
+ // Group 3
+ case 0x66: // operand size override
+ if (groupBits & ePrefixGroup3) {
+ return -1;
+ }
+ groupBits |= ePrefixGroup3;
+ ++index;
+ break;
+
+ // Group 4
+ case 0x67: // Address size override
+ if (groupBits & ePrefixGroup4) {
+ return -1;
+ }
+ groupBits |= ePrefixGroup4;
+ ++index;
+ break;
+
+ default:
+ return index;
+ }
+ }
+ }
+
+ // Return a ModR/M byte made from the 2 Mod bits, the register used for the
+ // reg bits and the register used for the R/M bits.
+ BYTE BuildModRmByte(BYTE aModBits, BYTE aReg, BYTE aRm) {
+ MOZ_ASSERT((aRm & kMaskRm) == aRm);
+ MOZ_ASSERT((aModBits & kMaskMod) == aModBits);
+ MOZ_ASSERT(((aReg << kRegFieldShift) & kMaskReg) ==
+ (aReg << kRegFieldShift));
+ return aModBits | (aReg << kRegFieldShift) | aRm;
+ }
+
+#endif // !defined(_M_ARM64)
+
+ // If originalFn is a recognized trampoline then patch it to call aDest,
+ // set *aTramp and *aOutTramp to that trampoline's target and return true.
+ bool PatchIfTargetIsRecognizedTrampoline(
+ Trampoline<MMPolicyT>& aTramp,
+ ReadOnlyTargetFunction<MMPolicyT>& aOriginalFn, intptr_t aDest,
+ void** aOutTramp) {
+#if defined(_M_X64)
+ // Variation 1:
+ // 48 b8 imm64 mov rax, imm64
+ // ff e0 jmp rax
+ //
+ // Variation 2:
+ // 48 b8 imm64 mov rax, imm64
+ // 50 push rax
+ // c3 ret
+ if ((aOriginalFn[0] == 0x48) && (aOriginalFn[1] == 0xB8) &&
+ ((aOriginalFn[10] == 0xFF && aOriginalFn[11] == 0xE0) ||
+ (aOriginalFn[10] == 0x50 && aOriginalFn[11] == 0xC3))) {
+ uintptr_t originalTarget =
+ (aOriginalFn + 2).template ChasePointer<uintptr_t>();
+
+ // Skip the first two bytes (48 b8) so that we can overwrite the imm64
+ WritableTargetFunction<MMPolicyT> target(aOriginalFn.Promote(8, 2));
+ if (!target) {
+ return false;
+ }
+
+ // Write the new JMP target address.
+ target.WritePointer(aDest);
+ if (!target.Commit()) {
+ return false;
+ }
+
+ // Store the old target address so we can restore it when we're cleared
+ aTramp.WritePointer(originalTarget);
+ if (!aTramp) {
+ return false;
+ }
+
+ *aOutTramp = reinterpret_cast<void*>(originalTarget);
+ return true;
+ }
+#endif // defined(_M_X64)
+
+ return false;
+ }
+
+#if defined(_M_ARM64)
+ bool Apply4BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
+ WritableTargetFunction<MMPolicyT>& target,
+ intptr_t aDest) {
+ MOZ_ASSERT(aTrampPool);
+ if (!aTrampPool) {
+ return false;
+ }
+
+ uintptr_t hookDest = arm64::MakeVeneer(*aTrampPool, aTrampPtr, aDest);
+ if (!hookDest) {
+ return false;
+ }
+
+ Maybe<uint32_t> branchImm = arm64::BuildUnconditionalBranchImm(
+ target.GetCurrentAddress(), hookDest);
+ if (!branchImm) {
+ return false;
+ }
+
+ target.WriteLong(branchImm.value());
+
+ return true;
+ }
+#endif // defined(_M_ARM64)
+
+#if defined(_M_X64)
+ bool Apply10BytePatch(TrampPoolT* aTrampPool, void* aTrampPtr,
+ WritableTargetFunction<MMPolicyT>& target,
+ intptr_t aDest) {
+ // Note: Even if the target function is also below 2GB, we still use an
+ // intermediary trampoline so that we consistently have a 64-bit pointer
+ // that we can use to reset the trampoline upon interceptor shutdown.
+ Maybe<Trampoline<MMPolicyT>> maybeCallTramp(
+ aTrampPool->GetNextTrampoline());
+ if (!maybeCallTramp) {
+ return false;
+ }
+
+ Trampoline<MMPolicyT> callTramp(std::move(maybeCallTramp.ref()));
+
+ // Write a null instance so that Clear() does not consider this tramp to
+ // be a normal tramp to be torn down.
+ callTramp.WriteEncodedPointer(nullptr);
+ // Use the second pointer slot to store a pointer to the primary tramp
+ callTramp.WriteEncodedPointer(aTrampPtr);
+ callTramp.StartExecutableCode();
+
+ // mov r11, address
+ callTramp.WriteByte(0x49);
+ callTramp.WriteByte(0xbb);
+ callTramp.WritePointer(aDest);
+
+ // jmp r11
+ callTramp.WriteByte(0x41);
+ callTramp.WriteByte(0xff);
+ callTramp.WriteByte(0xe3);
+
+ void* callTrampStart = callTramp.EndExecutableCode();
+ if (!callTrampStart) {
+ return false;
+ }
+
+ target.WriteByte(0xB8); // MOV EAX, IMM32
+
+ // Assert that the topmost 33 bits are 0
+ MOZ_ASSERT(
+ !(reinterpret_cast<uintptr_t>(callTrampStart) & (~0x7FFFFFFFULL)));
+
+ target.WriteLong(static_cast<uint32_t>(
+ reinterpret_cast<uintptr_t>(callTrampStart) & 0x7FFFFFFFU));
+ target.WriteByte(0x48); // REX.W
+ target.WriteByte(0x63); // MOVSXD r64, r/m32
+ // dest: rax, src: eax
+ target.WriteByte(BuildModRmByte(kModReg, kRegAx, kRegAx));
+ target.WriteByte(0xFF); // JMP /4
+ target.WriteByte(BuildModRmByte(kModReg, 4, kRegAx)); // rax
+
+ return true;
+ }
+#endif // defined(_M_X64)
+
+ void CreateTrampoline(ReadOnlyTargetFunction<MMPolicyT>& origBytes,
+ TrampPoolT* aTrampPool, Trampoline<MMPolicyT>& aTramp,
+ intptr_t aDest, void** aOutTramp) {
+ *aOutTramp = nullptr;
+
+ Trampoline<MMPolicyT>& tramp = aTramp;
+ if (!tramp) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_INVALID_TRAMPOLINE);
+ return;
+ }
+
+ // The beginning of the trampoline contains two pointer-width slots:
+ // [0]: |this|, so that we know whether the trampoline belongs to us;
+ // [1]: Pointer to original function, so that we can reset the hooked
+ // function to its original behavior upon destruction. In rare cases
+ // where the function was already a different trampoline, this is
+ // just a pointer to that trampoline's target address.
+ tramp.WriteEncodedPointer(this);
+ if (!tramp) {
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_WRITE_POINTER_ERROR);
+ return;
+ }
+
+ auto clearInstanceOnFailure = MakeScopeExit([this, aOutTramp, &tramp,
+ &origBytes]() -> void {
+ // *aOutTramp is not set until CreateTrampoline has completed
+ // successfully, so we can use that to check for success.
+ if (*aOutTramp) {
+ return;
+ }
+
+ // Clear the instance pointer so that we don't try to reset a
+ // nonexistent hook.
+ tramp.Rewind();
+ tramp.WriteEncodedPointer(nullptr);
+
+#if defined(NIGHTLY_BUILD)
+ origBytes.Rewind();
+ this->SetLastDetourError(
+ DetourResultCode::DETOUR_PATCHER_CREATE_TRAMPOLINE_ERROR);
+ DetourError& lastError = *this->mVMPolicy.mLastError;
+ size_t bytesToCapture = std::min(
+ ArrayLength(lastError.mOrigBytes),
+ static_cast<size_t>(PrimitiveT::GetWorstCaseRequiredBytesToPatch()));
+# if defined(_M_ARM64)
+ size_t numInstructionsToCapture = bytesToCapture / sizeof(uint32_t);
+ auto origBytesDst = reinterpret_cast<uint32_t*>(lastError.mOrigBytes);
+ for (size_t i = 0; i < numInstructionsToCapture; ++i) {
+ origBytesDst[i] = origBytes.ReadNextInstruction();
+ }
+# else
+ for (size_t i = 0; i < bytesToCapture; ++i) {
+ lastError.mOrigBytes[i] = origBytes[i];
+ }
+# endif // defined(_M_ARM64)
+#else
+ // Silence -Wunused-lambda-capture in non-Nightly.
+ Unused << this;
+ Unused << origBytes;
+#endif // defined(NIGHTLY_BUILD)
+ });
+
+ tramp.WritePointer(origBytes.AsEncodedPtr());
+ if (!tramp) {
+ return;
+ }
+
+ if (PatchIfTargetIsRecognizedTrampoline(tramp, origBytes, aDest,
+ aOutTramp)) {
+ return;
+ }
+
+ tramp.StartExecutableCode();
+
+ constexpr uint32_t kWorstCaseBytesRequired =
+ PrimitiveT::GetWorstCaseRequiredBytesToPatch();
+
+#if defined(_M_IX86)
+ int pJmp32 = -1;
+ while (origBytes.GetOffset() < kWorstCaseBytesRequired) {
+ // Understand some simple instructions that might be found in a
+ // prologue; we might need to extend this as necessary.
+ //
+ // Note! If we ever need to understand jump instructions, we'll
+ // need to rewrite the displacement argument.
+ unsigned char prefixGroups;
+ int numPrefixBytes = CountPrefixBytes(origBytes, &prefixGroups);
+ if (numPrefixBytes < 0 ||
+ (prefixGroups & (ePrefixGroup3 | ePrefixGroup4))) {
+ // Either the prefix sequence was bad, or there are prefixes that
+ // we don't currently support (groups 3 and 4)
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+
+ origBytes += numPrefixBytes;
+ if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+ // various MOVs
+ ++origBytes;
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+ return;
+ }
+ origBytes += len;
+ } else if (*origBytes == 0x0f &&
+ (origBytes[1] == 0x10 || origBytes[1] == 0x11)) {
+ // SSE: movups xmm, xmm/m128
+ // movups xmm/m128, xmm
+ origBytes += 2;
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+ return;
+ }
+ origBytes += len;
+ } else if (*origBytes == 0xA1) {
+ // MOV eax, [seg:offset]
+ origBytes += 5;
+ } else if (*origBytes == 0xB8) {
+ // MOV 0xB8: http://ref.x86asm.net/coder32.html#xB8
+ origBytes += 5;
+ } else if (*origBytes == 0x33 && (origBytes[1] & kMaskMod) == kModReg) {
+ // XOR r32, r32
+ origBytes += 2;
+ } else if ((*origBytes & 0xf8) == 0x40) {
+ // INC r32
+ origBytes += 1;
+ } else if (*origBytes == 0x83) {
+ uint8_t mod = static_cast<uint8_t>(origBytes[1]) & kMaskMod;
+ uint8_t rm = static_cast<uint8_t>(origBytes[1]) & kMaskRm;
+ if (mod == kModReg) {
+ // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
+ origBytes += 3;
+ } else if (mod == kModDisp8 && rm != kRmNeedSib) {
+ // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP [r+disp8], imm8
+ origBytes += 4;
+ } else {
+ // bail
+ MOZ_ASSERT_UNREACHABLE("Unrecognized bit opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x68) {
+ // PUSH with 4-byte operand
+ origBytes += 5;
+ } else if ((*origBytes & 0xf0) == 0x50) {
+ // 1-byte PUSH/POP
+ ++origBytes;
+ } else if (*origBytes == 0x6A) {
+ // PUSH imm8
+ origBytes += 2;
+ } else if (*origBytes == 0xe9) {
+ pJmp32 = origBytes.GetOffset();
+ // jmp 32bit offset
+ origBytes += 5;
+ } else if (*origBytes == 0xff && origBytes[1] == 0x25) {
+ // jmp [disp32]
+ origBytes += 6;
+ } else if (*origBytes == 0xc2) {
+ // ret imm16. We can't handle this but it happens. We don't ASSERT but
+ // we do fail to hook.
+# if defined(MOZILLA_INTERNAL_API)
+ NS_WARNING("Cannot hook method -- RET opcode found");
+# endif
+ return;
+ } else {
+ // printf ("Unknown x86 instruction byte 0x%02x, aborting trampoline\n",
+ // *origBytes);
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ }
+
+ // The trampoline is a copy of the instructions that we just traced,
+ // followed by a jump that we add below.
+ tramp.CopyFrom(origBytes.GetBaseAddress(), origBytes.GetOffset());
+ if (!tramp) {
+ return;
+ }
+#elif defined(_M_X64)
+ bool foundJmp = false;
+ // |use10BytePatch| should always default to |false| in production. It is
+ // not set to true unless we detect that a 10-byte patch is necessary.
+ // OTOH, for testing purposes, if we want to force a 10-byte patch, we
+ // always initialize |use10BytePatch| to |true|.
+ bool use10BytePatch =
+ (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch) ==
+ DetourFlags::eTestOnlyForceShortPatch;
+ const uint32_t bytesRequired =
+ use10BytePatch ? 10 : kWorstCaseBytesRequired;
+
+ while (origBytes.GetOffset() < bytesRequired) {
+ // If we found JMP 32bit offset, we require that the next bytes must
+ // be NOP or INT3. There is no reason to copy them.
+ // TODO: This used to trigger for Je as well. Now that I allow
+ // instructions after CALL and JE, I don't think I need that.
+ // The only real value of this condition is that if code follows a JMP
+ // then its _probably_ the target of a JMP somewhere else and we
+ // will be overwriting it, which would be tragic. This seems
+ // highly unlikely.
+ if (foundJmp) {
+ if (*origBytes == 0x90 || *origBytes == 0xcc) {
+ ++origBytes;
+ continue;
+ }
+
+ // If our trampoline space is located in the lowest 2GB, we can do a ten
+ // byte patch instead of a thirteen byte patch.
+ if (aTrampPool && aTrampPool->IsInLowest2GB() &&
+ origBytes.GetOffset() >= 10) {
+ use10BytePatch = true;
+ break;
+ }
+
+ MOZ_ASSERT_UNREACHABLE("Opcode sequence includes commands after JMP");
+ return;
+ }
+ if (*origBytes == 0x0f) {
+ COPY_CODES(1);
+ if (*origBytes == 0x1f) {
+ // nop (multibyte)
+ COPY_CODES(1);
+ if ((*origBytes & 0xc0) == 0x40 && (*origBytes & 0x7) == 0x04) {
+ COPY_CODES(3);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x05) {
+ // syscall
+ COPY_CODES(1);
+ } else if (*origBytes == 0x10 || *origBytes == 0x11) {
+ // SSE: movups xmm, xmm/m128
+ // movups xmm/m128, xmm
+ COPY_CODES(1);
+ int nModRmSibBytes = CountModRmSib(origBytes);
+ if (nModRmSibBytes < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ } else {
+ COPY_CODES(nModRmSibBytes);
+ }
+ } else if (*origBytes >= 0x83 && *origBytes <= 0x85) {
+ // 0f 83 cd JAE rel32
+ // 0f 84 cd JE rel32
+ // 0f 85 cd JNE rel32
+ const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
+ JumpType::Jne};
+ auto jumpType = kJumpTypes[*origBytes - 0x83];
+ ++origBytes;
+ --tramp; // overwrite the 0x0f we copied above
+
+ if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
+ jumpType)) {
+ return;
+ }
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+ // various 32-bit MOVs
+ COPY_CODES(1);
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+ return;
+ }
+ COPY_CODES(len);
+ } else if (*origBytes == 0x40 || *origBytes == 0x41) {
+ // Plain REX or REX.B
+ COPY_CODES(1);
+ if ((*origBytes & 0xf0) == 0x50) {
+ // push/pop with Rx register
+ COPY_CODES(1);
+ } else if (*origBytes >= 0xb8 && *origBytes <= 0xbf) {
+ // mov r32, imm32
+ COPY_CODES(5);
+ } else if (*origBytes == 0x8b && (origBytes[1] & kMaskMod) == kModReg) {
+ // 8B /r: mov r32, r/m32
+ COPY_CODES(2);
+ } else if (*origBytes == 0xf7 &&
+ (origBytes[1] & (kMaskMod | kMaskReg)) ==
+ (kModReg | (0 << kRegFieldShift))) {
+ // F7 /0 id: test r/m32, imm32
+ COPY_CODES(6);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x44) {
+ // REX.R
+ COPY_CODES(1);
+
+ // TODO: Combine with the "0x89" case below in the REX.W section
+ if (*origBytes == 0x89) {
+ // mov r/m32, r32
+ COPY_CODES(1);
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(len);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x45) {
+ // REX.R & REX.B
+ COPY_CODES(1);
+
+ if (*origBytes == 0x33) {
+ // xor r32, r32
+ COPY_CODES(2);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if ((*origBytes & 0xfa) == 0x48) {
+ // REX.W | REX.WR | REX.WRB | REX.WB
+ COPY_CODES(1);
+
+ if (*origBytes == 0x81 && (origBytes[1] & 0xf8) == 0xe8) {
+ // sub r, dword
+ COPY_CODES(6);
+ } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0xe8) {
+ // sub r, byte
+ COPY_CODES(3);
+ } else if (*origBytes == 0x83 &&
+ (origBytes[1] & (kMaskMod | kMaskReg)) == kModReg) {
+ // add r, byte
+ COPY_CODES(3);
+ } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
+ // and [r+d], imm8
+ COPY_CODES(5);
+ } else if (*origBytes == 0x2b && (origBytes[1] & kMaskMod) == kModReg) {
+ // sub r64, r64
+ COPY_CODES(2);
+ } else if (*origBytes == 0x85) {
+ // 85 /r => TEST r/m32, r32
+ if ((origBytes[1] & 0xc0) == 0xc0) {
+ COPY_CODES(2);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if ((*origBytes & 0xfd) == 0x89) {
+ // MOV r/m64, r64 | MOV r64, r/m64
+ BYTE reg;
+ int len = CountModRmSib(origBytes + 1, &reg);
+ if (len < 0) {
+ MOZ_ASSERT(len == kModOperand64);
+ if (len != kModOperand64) {
+ return;
+ }
+ origBytes += 2; // skip the MOV and MOD R/M bytes
+
+ // The instruction MOVs 64-bit data from a RIP-relative memory
+ // address (determined with a 32-bit offset from RIP) into a
+ // 64-bit register.
+ uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+
+ if (reg == kRegAx) {
+ // Destination is RAX. Encode instruction as MOVABS with a
+ // 64-bit absolute address as its immediate operand.
+ tramp.WriteByte(0xa1);
+ tramp.WritePointer(absAddr);
+ } else {
+ // The MOV must be done in two steps. First, we MOVABS the
+ // absolute 64-bit address into our target register.
+ // Then, we MOV from that address into the register
+ // using register-indirect addressing.
+ tramp.WriteByte(0xb8 + reg);
+ tramp.WritePointer(absAddr);
+ tramp.WriteByte(0x48);
+ tramp.WriteByte(0x8b);
+ tramp.WriteByte(BuildModRmByte(kModNoRegDisp, reg, reg));
+ }
+ } else {
+ COPY_CODES(len + 1);
+ }
+ } else if ((*origBytes & 0xf8) == 0xb8) {
+ // MOV r64, imm64
+ COPY_CODES(9);
+ } else if (*origBytes == 0xc7) {
+ // MOV r/m64, imm32
+ if (origBytes[1] == 0x44) {
+ // MOV [r64+disp8], imm32
+ // ModR/W + SIB + disp8 + imm32
+ COPY_CODES(8);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0xff) {
+ // JMP/4 or CALL/2
+ if ((origBytes[1] & 0xc0) == 0x0 && (origBytes[1] & 0x07) == 0x5 &&
+ ((origBytes[1] & 0x38) == 0x20 ||
+ (origBytes[1] & 0x38) == 0x10)) {
+ origBytes += 2;
+ --tramp; // overwrite the REX.W/REX.RW we copied above
+
+ foundJmp = (origBytes[1] & 0x38) == 0x20;
+ if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
+ foundJmp ? JumpType::Jmp : JumpType::Call)) {
+ return;
+ }
+ } else {
+ // not support yet!
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x8d) {
+ // LEA reg, addr
+ if ((origBytes[1] & kMaskMod) == 0x0 &&
+ (origBytes[1] & kMaskRm) == 0x5) {
+ // [rip+disp32]
+ // convert 32bit offset to 64bit direct and convert instruction
+ // to a simple 64-bit mov
+ BYTE reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
+ origBytes += 2;
+ uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+ tramp.WriteByte(0xb8 + reg); // move
+ tramp.WritePointer(absAddr);
+ } else {
+ // Above we dealt with RIP-relative instructions. Any other
+ // operand form can simply be copied.
+ int len = CountModRmSib(origBytes + 1);
+ // We handled the kModOperand64 -- ie RIP-relative -- case above
+ MOZ_ASSERT(len > 0);
+ COPY_CODES(len + 1);
+ }
+ } else if (*origBytes == 0x63 && (origBytes[1] & kMaskMod) == kModReg) {
+ // movsxd r64, r32 (move + sign extend)
+ COPY_CODES(2);
+ } else {
+ // not support yet!
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x66) {
+ // operand override prefix
+ COPY_CODES(1);
+ // This is the same as the x86 version
+ if (*origBytes >= 0x88 && *origBytes <= 0x8B) {
+ // various MOVs
+ unsigned char b = origBytes[1];
+ if (((b & 0xc0) == 0xc0) ||
+ (((b & 0xc0) == 0x00) && ((b & 0x07) != 0x04) &&
+ ((b & 0x07) != 0x05))) {
+ // REG=r, R/M=r or REG=r, R/M=[r]
+ COPY_CODES(2);
+ } else if ((b & 0xc0) == 0x40) {
+ if ((b & 0x07) == 0x04) {
+ // REG=r, R/M=[SIB + disp8]
+ COPY_CODES(4);
+ } else {
+ // REG=r, R/M=[r + disp8]
+ COPY_CODES(3);
+ }
+ } else {
+ // complex MOV, bail
+ MOZ_ASSERT_UNREACHABLE("Unrecognized MOV opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x44 && origBytes[1] == 0x89) {
+ // mov word ptr [reg+disp8], reg
+ COPY_CODES(2);
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ // no way to support this yet.
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(len);
+ }
+ } else if ((*origBytes & 0xf0) == 0x50) {
+ // 1-byte push/pop
+ COPY_CODES(1);
+ } else if (*origBytes == 0x65) {
+ // GS prefix
+ //
+ // The entry of GetKeyState on Windows 10 has the following code.
+ // 65 48 8b 04 25 30 00 00 00 mov rax,qword ptr gs:[30h]
+ // (GS prefix + REX + MOV (0x8b) ...)
+ if (origBytes[1] == 0x48 &&
+ (origBytes[2] >= 0x88 && origBytes[2] <= 0x8b)) {
+ COPY_CODES(3);
+ int len = CountModRmSib(origBytes);
+ if (len < 0) {
+ // no way to support this yet.
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(len);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x80 && origBytes[1] == 0x3d) {
+ origBytes += 2;
+
+ // cmp byte ptr [rip-relative address], imm8
+ // We'll compute the absolute address and do the cmp in r11
+
+ // push r11 (to save the old value)
+ tramp.WriteByte(0x49);
+ tramp.WriteByte(0x53);
+
+ uintptr_t absAddr = origBytes.ReadDisp32AsAbsolute();
+
+ // mov r11, absolute address
+ tramp.WriteByte(0x49);
+ tramp.WriteByte(0xbb);
+ tramp.WritePointer(absAddr);
+
+ // cmp byte ptr [r11],...
+ tramp.WriteByte(0x41);
+ tramp.WriteByte(0x80);
+ tramp.WriteByte(0x3b);
+
+ // ...imm8
+ COPY_CODES(1);
+
+ // pop r11 (doesn't affect the flags from the cmp)
+ tramp.WriteByte(0x49);
+ tramp.WriteByte(0x5b);
+ } else if (*origBytes == 0x90) {
+ // nop
+ COPY_CODES(1);
+ } else if ((*origBytes & 0xf8) == 0xb8) {
+ // MOV r32, imm32
+ COPY_CODES(5);
+ } else if (*origBytes == 0x33) {
+ // xor r32, r/m32
+ COPY_CODES(2);
+ } else if (*origBytes == 0xf6) {
+ // test r/m8, imm8 (used by ntdll on Windows 10 x64)
+ // (no flags are affected by near jmp since there is no task switch,
+ // so it is ok for a jmp to be written immediately after a test)
+ BYTE subOpcode = 0;
+ int nModRmSibBytes = CountModRmSib(origBytes + 1, &subOpcode);
+ if (nModRmSibBytes < 0 || subOpcode != 0) {
+ // Unsupported
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(2 + nModRmSibBytes);
+ } else if (*origBytes == 0x85) {
+ // test r/m32, r32
+ int nModRmSibBytes = CountModRmSib(origBytes + 1);
+ if (nModRmSibBytes < 0) {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(1 + nModRmSibBytes);
+ } else if (*origBytes == 0xd1 && (origBytes[1] & kMaskMod) == kModReg) {
+ // bit shifts/rotates : (SA|SH|RO|RC)(R|L) r32
+ // (e.g. 0xd1 0xe0 is SAL, 0xd1 0xc8 is ROR)
+ COPY_CODES(2);
+ } else if (*origBytes == 0x83 && (origBytes[1] & kMaskMod) == kModReg) {
+ // ADD|OR|ADC|SBB|AND|SUB|XOR|CMP r, imm8
+ COPY_CODES(3);
+ } else if (*origBytes == 0xc3) {
+ // ret
+ COPY_CODES(1);
+ } else if (*origBytes == 0xcc) {
+ // int 3
+ COPY_CODES(1);
+ } else if (*origBytes == 0xe8 || *origBytes == 0xe9) {
+ // CALL (0xe8) or JMP (0xe9) 32bit offset
+ foundJmp = *origBytes == 0xe9;
+ ++origBytes;
+
+ if (!GenerateJump(tramp, origBytes.ReadDisp32AsAbsolute(),
+ foundJmp ? JumpType::Jmp : JumpType::Call)) {
+ return;
+ }
+ } else if (*origBytes >= 0x73 && *origBytes <= 0x75) {
+ // 73 cb JAE rel8
+ // 74 cb JE rel8
+ // 75 cb JNE rel8
+ const JumpType kJumpTypes[] = {JumpType::Jae, JumpType::Je,
+ JumpType::Jne};
+ auto jumpType = kJumpTypes[*origBytes - 0x73];
+ uint8_t offset = origBytes[1];
+
+ origBytes += 2;
+
+ if (!GenerateJump(tramp, origBytes.OffsetToAbsolute(offset),
+ jumpType)) {
+ return;
+ }
+ } else if (*origBytes == 0xff) {
+ uint8_t mod = origBytes[1] & kMaskMod;
+ uint8_t reg = (origBytes[1] & kMaskReg) >> kRegFieldShift;
+ uint8_t rm = origBytes[1] & kMaskRm;
+ if (mod == kModReg && (reg == 0 || reg == 1 || reg == 2 || reg == 6)) {
+ // INC|DEC|CALL|PUSH r64
+ COPY_CODES(2);
+ } else if (mod == kModNoRegDisp && reg == 2 &&
+ rm == kRmNoRegDispDisp32) {
+ // FF 15 CALL [disp32]
+ origBytes += 2;
+ if (!GenerateJump(tramp, origBytes.ChasePointerFromDisp(),
+ JumpType::Call)) {
+ return;
+ }
+ } else if (reg == 4) {
+ // FF /4 (Opcode=ff, REG=4): JMP r/m
+ if (mod == kModNoRegDisp && rm == kRmNoRegDispDisp32) {
+ // FF 25 JMP [disp32]
+ foundJmp = true;
+
+ origBytes += 2;
+
+ uintptr_t jmpDest = origBytes.ChasePointerFromDisp();
+
+ if (!GenerateJump(tramp, jmpDest, JumpType::Jmp)) {
+ return;
+ }
+ } else {
+ // JMP r/m except JMP [disp32]
+ int len = CountModRmSib(origBytes + 1);
+ if (len < 0) {
+ // RIP-relative not yet supported
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+
+ COPY_CODES(len + 1);
+
+ foundJmp = true;
+ }
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ } else if (*origBytes == 0x83 && (origBytes[1] & 0xf8) == 0x60) {
+ // and [r+d], imm8
+ COPY_CODES(5);
+ } else if (*origBytes == 0xc6) {
+ // mov [r+d], imm8
+ int len = CountModRmSib(origBytes + 1);
+ if (len < 0) {
+ // RIP-relative not yet supported
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ COPY_CODES(len + 2);
+ } else {
+ MOZ_ASSERT_UNREACHABLE("Unrecognized opcode sequence");
+ return;
+ }
+ }
+#elif defined(_M_ARM64)
+
+ // The number of bytes required to facilitate a detour depends on the
+ // proximity of the hook function to the target function. In the best case,
+ // we can branch within +/- 128MB of the current location, requiring only
+ // 4 bytes. In the worst case, we need 16 bytes to load an absolute address
+ // into a register and then branch to it.
+ const uint32_t bytesRequiredFromDecode =
+ (mFlags.value() & DetourFlags::eTestOnlyForceShortPatch)
+ ? 4
+ : kWorstCaseBytesRequired;
+
+ while (origBytes.GetOffset() < bytesRequiredFromDecode) {
+ uintptr_t curPC = origBytes.GetCurrentAbsolute();
+ uint32_t curInst = origBytes.ReadNextInstruction();
+
+ Result<arm64::LoadOrBranch, arm64::PCRelCheckError> pcRelInfo =
+ arm64::CheckForPCRel(curPC, curInst);
+ if (pcRelInfo.isErr()) {
+ if (pcRelInfo.unwrapErr() ==
+ arm64::PCRelCheckError::InstructionNotPCRel) {
+ // Instruction is not PC-relative, we can just copy it verbatim
+ tramp.WriteInstruction(curInst);
+ continue;
+ }
+
+ // At this point we have determined that there is no decoder available
+ // for the current, PC-relative, instruction.
+
+ // origBytes is now pointing one instruction past the one that we
+ // need the trampoline to jump back to.
+ if (!origBytes.BackUpOneInstruction()) {
+ return;
+ }
+
+ break;
+ }
+
+ // We need to load an absolute address into a particular register
+ tramp.WriteLoadLiteral(pcRelInfo.inspect().mAbsAddress,
+ pcRelInfo.inspect().mDestReg);
+ }
+
+#else
+# error "Unknown processor type"
+#endif
+
+ if (origBytes.GetOffset() > 100) {
+ // printf ("Too big!");
+ return;
+ }
+
+#if defined(_M_IX86)
+ if (pJmp32 >= 0) {
+ // Jump directly to the original target of the jump instead of jumping to
+ // the original function. Adjust jump target displacement to jump location
+ // in the trampoline.
+ tramp.AdjustDisp32AtOffset(pJmp32 + 1, origBytes.GetBaseAddress());
+ } else {
+ tramp.WriteByte(0xe9); // jmp
+ tramp.WriteDisp32(origBytes.GetAddress());
+ }
+#elif defined(_M_X64)
+ // If we found a Jmp, we don't need to add another instruction. However,
+ // if we found a _conditional_ jump or a CALL (or no control operations
+ // at all) then we still need to run the rest of aOriginalFunction.
+ if (!foundJmp) {
+ if (!GenerateJump(tramp, origBytes.GetAddress(), JumpType::Jmp)) {
+ return;
+ }
+ }
+#elif defined(_M_ARM64)
+ // Let's find out how many bytes we have available to us for patching
+ uint32_t numBytesForPatching = tramp.GetCurrentExecutableCodeLen();
+
+ if (!numBytesForPatching) {
+ // There's nothing we can do
+ return;
+ }
+
+ if (tramp.IsNull()) {
+ // Recursive case
+ HMODULE targetModule = nullptr;
+
+ if (numBytesForPatching < kWorstCaseBytesRequired) {
+ if (!::GetModuleHandleExW(
+ GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+ GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ reinterpret_cast<LPCWSTR>(origBytes.GetBaseAddress()),
+ &targetModule)) {
+ return;
+ }
+ }
+
+ Maybe<TrampPoolT> maybeTrampPool = DoReserve(targetModule);
+ MOZ_ASSERT(maybeTrampPool);
+ if (!maybeTrampPool) {
+ return;
+ }
+
+ Maybe<Trampoline<MMPolicyT>> maybeRealTramp(
+ maybeTrampPool.ref().GetNextTrampoline());
+ if (!maybeRealTramp) {
+ return;
+ }
+
+ origBytes.Rewind();
+ CreateTrampoline(origBytes, maybeTrampPool.ptr(), maybeRealTramp.ref(),
+ aDest, aOutTramp);
+ return;
+ }
+
+ // Write the branch from the trampoline back to the original code
+
+ tramp.WriteLoadLiteral(origBytes.GetAddress(), 16);
+ tramp.WriteInstruction(arm64::BuildUnconditionalBranchToRegister(16));
+#else
+# error "Unsupported processor architecture"
+#endif
+
+ // The trampoline is now complete.
+ void* trampPtr = tramp.EndExecutableCode();
+ if (!trampPtr) {
+ return;
+ }
+
+#ifdef _M_X64
+ if constexpr (MMPolicyT::kSupportsUnwindInfo) {
+ DebugOnly<bool> unwindInfoAdded = tramp.AddUnwindInfo(
+ origBytes.GetBaseAddress(), origBytes.GetOffset());
+ MOZ_ASSERT(unwindInfoAdded);
+ }
+#endif // _M_X64
+
+ WritableTargetFunction<MMPolicyT> target(origBytes.Promote());
+ if (!target) {
+ return;
+ }
+
+ do {
+ // Now patch the original function.
+ // When we're instructed to apply a non-default patch, apply it and exit.
+ // If non-default patching fails, bail out, no fallback.
+ // Otherwise, we go straight to the default patch.
+
+#if defined(_M_X64)
+ if (use10BytePatch) {
+ if (!Apply10BytePatch(aTrampPool, trampPtr, target, aDest)) {
+ return;
+ }
+ break;
+ }
+#elif defined(_M_ARM64)
+ if (numBytesForPatching < kWorstCaseBytesRequired) {
+ if (!Apply4BytePatch(aTrampPool, trampPtr, target, aDest)) {
+ return;
+ }
+ break;
+ }
+#endif
+
+ PrimitiveT::ApplyDefaultPatch(target, aDest);
+ } while (false);
+
+ // Output the trampoline, thus signalling that this call was a success. This
+ // must happen before our patched function can be reached from another
+ // thread, so before we commit the target code (bug 1838286).
+ *aOutTramp = trampPtr;
+
+ if (!target.Commit()) {
+ *aOutTramp = nullptr;
+ }
+ }
+};
+
+} // namespace interceptor
+} // namespace mozilla
+
+#endif // mozilla_interceptor_PatcherDetour_h