49 files changed, 8326 insertions, 0 deletions
diff --git a/gfx/skia/skia/src/base/README.md b/gfx/skia/skia/src/base/README.md
new file mode 100644
index 0000000000..322c671436
--- /dev/null
+++ b/gfx/skia/skia/src/base/README.md
@@ -0,0 +1,4 @@
+The files here are part of the base package (see also include/private/base). The distinction
+is that the files here are not needed by anything in the public API.
+
+Files here should not depend on anything other than system headers or other files in base.
+\ No newline at end of file
diff --git a/gfx/skia/skia/src/base/SkASAN.h b/gfx/skia/skia/src/base/SkASAN.h
new file mode 100644
index 0000000000..8da93daaa0
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkASAN.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkASAN_DEFINED
+#define SkASAN_DEFINED
+
+#include <cstddef>
+
+#ifdef MOZ_SKIA
+
+#include "mozilla/MemoryChecking.h"
+
+#ifdef MOZ_HAVE_MEM_CHECKS
+#define SK_SANITIZE_ADDRESS MOZ_HAVE_MEM_CHECKS
+#endif
+
+static inline void sk_asan_poison_memory_region(void const volatile *addr, size_t size) {
+    MOZ_MAKE_MEM_NOACCESS(addr, size);
+}
+
+static inline void sk_asan_unpoison_memory_region(void const volatile *addr, size_t size) {
+    MOZ_MAKE_MEM_DEFINED(addr, size);
+}
+
+#else // !MOZ_SKIA
+
+#ifdef __SANITIZE_ADDRESS__
+    #define SK_SANITIZE_ADDRESS 1
+#endif
+#if !defined(SK_SANITIZE_ADDRESS) && defined(__has_feature)
+    #if __has_feature(address_sanitizer)
+        #define SK_SANITIZE_ADDRESS 1
+    #endif
+#endif
+
+// Typically declared in LLVM's asan_interface.h.
+#ifdef SK_SANITIZE_ADDRESS
+extern "C" {
+    void __asan_poison_memory_region(void const volatile *addr, size_t size);
+    void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+}
+#endif
+
+// Code that implements bespoke allocation arenas can poison the entire arena on creation, then
+// unpoison chunks of arena memory as they are parceled out. Consider leaving gaps between blocks
+// to detect buffer overrun.
+static inline void sk_asan_poison_memory_region(void const volatile *addr, size_t size) {
+#ifdef SK_SANITIZE_ADDRESS
+    __asan_poison_memory_region(addr, size);
+#endif
+}
+
+static inline void sk_asan_unpoison_memory_region(void const volatile *addr, size_t size) {
+#ifdef SK_SANITIZE_ADDRESS
+    __asan_unpoison_memory_region(addr, size);
+#endif
+}
+
+#endif // !MOZ_SKIA
+
+#endif  // SkASAN_DEFINED
diff --git a/gfx/skia/skia/src/base/SkArenaAlloc.cpp b/gfx/skia/skia/src/base/SkArenaAlloc.cpp
new file mode 100644
index 0000000000..2dc1c00226
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkArenaAlloc.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkArenaAlloc.h"
+
+#include "include/private/base/SkMalloc.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+
+static char* end_chain(char*) { return nullptr; }
+
+SkArenaAlloc::SkArenaAlloc(char* block, size_t size, size_t firstHeapAllocation)
+    : fDtorCursor {block}
+    , fCursor     {block}
+    , fEnd        {block + SkToU32(size)}
+    , fFibonacciProgression{SkToU32(size), SkToU32(firstHeapAllocation)}
+{
+    if (size < sizeof(Footer)) {
+        fEnd = fCursor = fDtorCursor = nullptr;
+    }
+
+    if (fCursor != nullptr) {
+        this->installFooter(end_chain, 0);
+        sk_asan_poison_memory_region(fCursor, fEnd - fCursor);
+    }
+}
+
+SkArenaAlloc::~SkArenaAlloc() {
+    RunDtorsOnBlock(fDtorCursor);
+}
+
+void SkArenaAlloc::installFooter(FooterAction* action, uint32_t padding) {
+    assert(SkTFitsIn<uint8_t>(padding));
+    this->installRaw(action);
+    this->installRaw((uint8_t)padding);
+    fDtorCursor = fCursor;
+}
+
+char* SkArenaAlloc::SkipPod(char* footerEnd) {
+    char* objEnd = footerEnd - (sizeof(Footer) + sizeof(uint32_t));
+    uint32_t skip;
+    memmove(&skip, objEnd, sizeof(uint32_t));
+    return objEnd - (ptrdiff_t) skip;
+}
+
+void SkArenaAlloc::RunDtorsOnBlock(char* footerEnd) {
+    while (footerEnd != nullptr) {
+        FooterAction* action;
+        uint8_t       padding;
+
+        memcpy(&action,  footerEnd - sizeof( Footer), sizeof( action));
+        memcpy(&padding, footerEnd - sizeof(padding), sizeof(padding));
+
+        footerEnd = action(footerEnd) - (ptrdiff_t)padding;
+    }
+}
+
+char* SkArenaAlloc::NextBlock(char* footerEnd) {
+    char* objEnd = footerEnd - (sizeof(char*) + sizeof(Footer));
+    char* next;
+    memmove(&next, objEnd, sizeof(char*));
+    RunDtorsOnBlock(next);
+    sk_free(objEnd);
+    return nullptr;
+}
+
+void SkArenaAlloc::ensureSpace(uint32_t size, uint32_t alignment) {
+    constexpr uint32_t headerSize = sizeof(Footer) + sizeof(ptrdiff_t);
+    constexpr uint32_t maxSize = std::numeric_limits<uint32_t>::max();
+    constexpr uint32_t overhead = headerSize + sizeof(Footer);
+    AssertRelease(size <= maxSize - overhead);
+    uint32_t objSizeAndOverhead = size + overhead;
+
+    const uint32_t alignmentOverhead = alignment - 1;
+    AssertRelease(objSizeAndOverhead <= maxSize - alignmentOverhead);
+    objSizeAndOverhead += alignmentOverhead;
+
+    uint32_t minAllocationSize = fFibonacciProgression.nextBlockSize();
+    uint32_t allocationSize = std::max(objSizeAndOverhead, minAllocationSize);
+
+    // Round up to a nice size. If > 32K align to 4K boundary else up to max_align_t. The > 32K
+    // heuristic is from the JEMalloc behavior.
+    {
+        uint32_t mask = allocationSize > (1 << 15) ? (1 << 12) - 1 : 16 - 1;
+        AssertRelease(allocationSize <= maxSize - mask);
+        allocationSize = (allocationSize + mask) & ~mask;
+    }
+
+    char* newBlock = static_cast<char*>(sk_malloc_throw(allocationSize));
+
+    auto previousDtor = fDtorCursor;
+    fCursor = newBlock;
+    fDtorCursor = newBlock;
+    fEnd = fCursor + allocationSize;
+
+    // poison the unused bytes in the block.
+    sk_asan_poison_memory_region(fCursor, fEnd - fCursor);
+
+    this->installRaw(previousDtor);
+    this->installFooter(NextBlock, 0);
+}
+
+char* SkArenaAlloc::allocObjectWithFooter(uint32_t sizeIncludingFooter, uint32_t alignment) {
+    uintptr_t mask = alignment - 1;
+
+restart:
+    uint32_t skipOverhead = 0;
+    const bool needsSkipFooter = fCursor != fDtorCursor;
+    if (needsSkipFooter) {
+        skipOverhead = sizeof(Footer) + sizeof(uint32_t);
+    }
+    const uint32_t totalSize = sizeIncludingFooter + skipOverhead;
+
+    // Math on null fCursor/fEnd is undefined behavior, so explicitly check for first alloc.
+    if (!fCursor) {
+        this->ensureSpace(totalSize, alignment);
+        goto restart;
+    }
+
+    assert(fEnd);
+    // This test alone would be enough nullptr were defined to be 0, but it's not.
+    char* objStart = (char*)((uintptr_t)(fCursor + skipOverhead + mask) & ~mask);
+    if ((ptrdiff_t)totalSize > fEnd - objStart) {
+        this->ensureSpace(totalSize, alignment);
+        goto restart;
+    }
+
+    AssertRelease((ptrdiff_t)totalSize <= fEnd - objStart);
+
+    // Install a skip footer if needed, thus terminating a run of POD data. The calling code is
+    // responsible for installing the footer after the object.
+    if (needsSkipFooter) {
+        this->installRaw(SkToU32(fCursor - fDtorCursor));
+        this->installFooter(SkipPod, 0);
+    }
+
+    return objStart;
+}
+
+SkArenaAllocWithReset::SkArenaAllocWithReset(char* block,
+                                             size_t size,
+                                             size_t firstHeapAllocation)
+        : SkArenaAlloc(block, size, firstHeapAllocation)
+        , fFirstBlock{block}
+        , fFirstSize{SkToU32(size)}
+        , fFirstHeapAllocationSize{SkToU32(firstHeapAllocation)} {}
+
+void SkArenaAllocWithReset::reset() {
+    char* const    firstBlock              = fFirstBlock;
+    const uint32_t firstSize               = fFirstSize;
+    const uint32_t firstHeapAllocationSize = fFirstHeapAllocationSize;
+    this->~SkArenaAllocWithReset();
+    new (this) SkArenaAllocWithReset{firstBlock, firstSize, firstHeapAllocationSize};
+}
+
+// SkFibonacci47 is the first 47 Fibonacci numbers. Fib(47) is the largest value less than 2 ^ 32.
+// Used by SkFibBlockSizes.
+std::array<const uint32_t, 47> SkFibonacci47 {
+                1,         1,          2,          3,          5,          8,
+               13,        21,         34,         55,         89,        144,
+              233,       377,        610,        987,       1597,       2584,
+             4181,      6765,      10946,      17711,      28657,      46368,
+            75025,    121393,     196418,     317811,     514229,     832040,
+          1346269,   2178309,    3524578,    5702887,    9227465,   14930352,
+         24157817,  39088169,   63245986,  102334155,  165580141,  267914296,
+        433494437, 701408733, 1134903170, 1836311903, 2971215073,
+};
diff --git a/gfx/skia/skia/src/base/SkArenaAlloc.h b/gfx/skia/skia/src/base/SkArenaAlloc.h
new file mode 100644
index 0000000000..547f2c5910
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkArenaAlloc.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkArenaAlloc_DEFINED
+#define SkArenaAlloc_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkTFitsIn.h"
+#include "include/private/base/SkTo.h"
+#include "src/base/SkASAN.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+// We found allocating strictly doubling amounts of memory from the heap left too
+// much unused slop, particularly on Android.  Instead we'll follow a Fibonacci-like
+// progression.
+
+// SkFibonacci47 is the first 47 Fibonacci numbers. Fib(47) is the largest value less than 2 ^ 32.
+extern std::array<const uint32_t, 47> SkFibonacci47;
+template<uint32_t kMaxSize>
+class SkFibBlockSizes {
+public:
+    // staticBlockSize, and firstAllocationSize are parameters describing the initial memory
+    // layout. staticBlockSize describes the size of the inlined memory, and firstAllocationSize
+    // describes the size of the first block to be allocated if the static block is exhausted. By
+    // convention, firstAllocationSize is the first choice for the block unit size followed by
+    // staticBlockSize followed by the default of 1024 bytes.
+    SkFibBlockSizes(uint32_t staticBlockSize, uint32_t firstAllocationSize) : fIndex{0} {
+        fBlockUnitSize = firstAllocationSize > 0 ? firstAllocationSize :
+                         staticBlockSize     > 0 ? staticBlockSize     : 1024;
+
+        SkASSERT_RELEASE(0 < fBlockUnitSize);
+        SkASSERT_RELEASE(fBlockUnitSize < std::min(kMaxSize, (1u << 26) - 1));
+    }
+
+    uint32_t nextBlockSize() {
+        uint32_t result = SkFibonacci47[fIndex] * fBlockUnitSize;
+
+        if (SkTo<size_t>(fIndex + 1) < SkFibonacci47.size() &&
+            SkFibonacci47[fIndex + 1] < kMaxSize / fBlockUnitSize)
+        {
+            fIndex += 1;
+        }
+
+        return result;
+    }
+
+private:
+    uint32_t fIndex : 6;
+    uint32_t fBlockUnitSize : 26;
+};
+
+// SkArenaAlloc allocates object and destroys the allocated objects when destroyed. It's designed
+// to minimize the number of underlying block allocations. SkArenaAlloc allocates first out of an
+// (optional) user-provided block of memory, and when that's exhausted it allocates on the heap,
+// starting with an allocation of firstHeapAllocation bytes.  If your data (plus a small overhead)
+// fits in the user-provided block, SkArenaAlloc never uses the heap, and if it fits in
+// firstHeapAllocation bytes, it'll use the heap only once. If 0 is specified for
+// firstHeapAllocation, then blockSize is used unless that too is 0, then 1024 is used.
+//
+// Examples:
+//
+//   char block[mostCasesSize];
+//   SkArenaAlloc arena(block, mostCasesSize);
+//
+// If mostCasesSize is too large for the stack, you can use the following pattern.
+//
+//   std::unique_ptr<char[]> block{new char[mostCasesSize]};
+//   SkArenaAlloc arena(block.get(), mostCasesSize, almostAllCasesSize);
+//
+// If the program only sometimes allocates memory, use the following pattern.
+//
+//   SkArenaAlloc arena(nullptr, 0, almostAllCasesSize);
+//
+// The storage does not necessarily need to be on the stack. Embedding the storage in a class also
+// works.
+//
+//   class Foo {
+//       char storage[mostCasesSize];
+//       SkArenaAlloc arena (storage, mostCasesSize);
+//   };
+//
+// In addition, the system is optimized to handle POD data including arrays of PODs (where
+// POD is really data with no destructors). For POD data it has zero overhead per item, and a
+// typical per block overhead of 8 bytes. For non-POD objects there is a per item overhead of 4
+// bytes. For arrays of non-POD objects there is a per array overhead of typically 8 bytes. There
+// is an addition overhead when switching from POD data to non-POD data of typically 8 bytes.
+//
+// If additional blocks are needed they are increased exponentially. This strategy bounds the
+// recursion of the RunDtorsOnBlock to be limited to O(log size-of-memory). Block size grow using
+// the Fibonacci sequence which means that for 2^32 memory there are 48 allocations, and for 2^48
+// there are 71 allocations.
+class SkArenaAlloc {
+public:
+    SkArenaAlloc(char* block, size_t blockSize, size_t firstHeapAllocation);
+
+    explicit SkArenaAlloc(size_t firstHeapAllocation)
+        : SkArenaAlloc(nullptr, 0, firstHeapAllocation) {}
+
+    SkArenaAlloc(const SkArenaAlloc&) = delete;
+    SkArenaAlloc& operator=(const SkArenaAlloc&) = delete;
+    SkArenaAlloc(SkArenaAlloc&&) = delete;
+    SkArenaAlloc& operator=(SkArenaAlloc&&) = delete;
+
+    ~SkArenaAlloc();
+
+    template <typename Ctor>
+    auto make(Ctor&& ctor) -> decltype(ctor(nullptr)) {
+        using T = std::remove_pointer_t<decltype(ctor(nullptr))>;
+
+        uint32_t size      = SkToU32(sizeof(T));
+        uint32_t alignment = SkToU32(alignof(T));
+        char* objStart;
+        if (std::is_trivially_destructible<T>::value) {
+            objStart = this->allocObject(size, alignment);
+            fCursor = objStart + size;
+            sk_asan_unpoison_memory_region(objStart, size);
+        } else {
+            objStart = this->allocObjectWithFooter(size + sizeof(Footer), alignment);
+            // Can never be UB because max value is alignof(T).
+            uint32_t padding = SkToU32(objStart - fCursor);
+
+            // Advance to end of object to install footer.
+            fCursor = objStart + size;
+            sk_asan_unpoison_memory_region(objStart, size);
+            FooterAction* releaser = [](char* objEnd) {
+                char* objStart = objEnd - (sizeof(T) + sizeof(Footer));
+                ((T*)objStart)->~T();
+                return objStart;
+            };
+            this->installFooter(releaser, padding);
+        }
+
+        // This must be last to make objects with nested use of this allocator work.
+        return ctor(objStart);
+    }
+
+    template <typename T, typename... Args>
+    T* make(Args&&... args) {
+        return this->make([&](void* objStart) {
+            return new(objStart) T(std::forward<Args>(args)...);
+        });
+    }
+
+    template <typename T>
+    T* makeArrayDefault(size_t count) {
+        T* array = this->allocUninitializedArray<T>(count);
+        for (size_t i = 0; i < count; i++) {
+            // Default initialization: if T is primitive then the value is left uninitialized.
+            new (&array[i]) T;
+        }
+        return array;
+    }
+
+    template <typename T>
+    T* makeArray(size_t count) {
+        T* array = this->allocUninitializedArray<T>(count);
+        for (size_t i = 0; i < count; i++) {
+            // Value initialization: if T is primitive then the value is zero-initialized.
+            new (&array[i]) T();
+        }
+        return array;
+    }
+
+    template <typename T, typename Initializer>
+    T* makeInitializedArray(size_t count, Initializer initializer) {
+        T* array = this->allocUninitializedArray<T>(count);
+        for (size_t i = 0; i < count; i++) {
+            new (&array[i]) T(initializer(i));
+        }
+        return array;
+    }
+
+    // Only use makeBytesAlignedTo if none of the typed variants are impractical to use.
+    void* makeBytesAlignedTo(size_t size, size_t align) {
+        AssertRelease(SkTFitsIn<uint32_t>(size));
+        auto objStart = this->allocObject(SkToU32(size), SkToU32(align));
+        fCursor = objStart + size;
+        sk_asan_unpoison_memory_region(objStart, size);
+        return objStart;
+    }
+
+private:
+    static void AssertRelease(bool cond) { if (!cond) { ::abort(); } }
+
+    using FooterAction = char* (char*);
+    struct Footer {
+        uint8_t unaligned_action[sizeof(FooterAction*)];
+        uint8_t padding;
+    };
+
+    static char* SkipPod(char* footerEnd);
+    static void RunDtorsOnBlock(char* footerEnd);
+    static char* NextBlock(char* footerEnd);
+
+    template <typename T>
+    void installRaw(const T& val) {
+        sk_asan_unpoison_memory_region(fCursor, sizeof(val));
+        memcpy(fCursor, &val, sizeof(val));
+        fCursor += sizeof(val);
+    }
+    void installFooter(FooterAction* releaser, uint32_t padding);
+
+    void ensureSpace(uint32_t size, uint32_t alignment);
+
+    char* allocObject(uint32_t size, uint32_t alignment) {
+        uintptr_t mask = alignment - 1;
+        uintptr_t alignedOffset = (~reinterpret_cast<uintptr_t>(fCursor) + 1) & mask;
+        uintptr_t totalSize = size + alignedOffset;
+        AssertRelease(totalSize >= size);
+        if (totalSize > static_cast<uintptr_t>(fEnd - fCursor)) {
+            this->ensureSpace(size, alignment);
+            alignedOffset = (~reinterpret_cast<uintptr_t>(fCursor) + 1) & mask;
+        }
+
+        char* object = fCursor + alignedOffset;
+
+        SkASSERT((reinterpret_cast<uintptr_t>(object) & (alignment - 1)) == 0);
+        SkASSERT(object + size <= fEnd);
+
+        return object;
+    }
+
+    char* allocObjectWithFooter(uint32_t sizeIncludingFooter, uint32_t alignment);
+
+    template <typename T>
+    T* allocUninitializedArray(size_t countZ) {
+        AssertRelease(SkTFitsIn<uint32_t>(countZ));
+        uint32_t count = SkToU32(countZ);
+
+        char* objStart;
+        AssertRelease(count <= std::numeric_limits<uint32_t>::max() / sizeof(T));
+        uint32_t arraySize = SkToU32(count * sizeof(T));
+        uint32_t alignment = SkToU32(alignof(T));
+
+        if (std::is_trivially_destructible<T>::value) {
+            objStart = this->allocObject(arraySize, alignment);
+            fCursor = objStart + arraySize;
+            sk_asan_unpoison_memory_region(objStart, arraySize);
+        } else {
+            constexpr uint32_t overhead = sizeof(Footer) + sizeof(uint32_t);
+            AssertRelease(arraySize <= std::numeric_limits<uint32_t>::max() - overhead);
+            uint32_t totalSize = arraySize + overhead;
+            objStart = this->allocObjectWithFooter(totalSize, alignment);
+
+            // Can never be UB because max value is alignof(T).
+            uint32_t padding = SkToU32(objStart - fCursor);
+
+            // Advance to end of array to install footer.
+            fCursor = objStart + arraySize;
+            sk_asan_unpoison_memory_region(objStart, arraySize);
+            this->installRaw(SkToU32(count));
+            this->installFooter(
+                [](char* footerEnd) {
+                    char* objEnd = footerEnd - (sizeof(Footer) + sizeof(uint32_t));
+                    uint32_t count;
+                    memmove(&count, objEnd, sizeof(uint32_t));
+                    char* objStart = objEnd - count * sizeof(T);
+                    T* array = (T*) objStart;
+                    for (uint32_t i = 0; i < count; i++) {
+                        array[i].~T();
+                    }
+                    return objStart;
+                },
+                padding);
+        }
+
+        return (T*)objStart;
+    }
+
+    char*          fDtorCursor;
+    char*          fCursor;
+    char*          fEnd;
+
+    SkFibBlockSizes<std::numeric_limits<uint32_t>::max()> fFibonacciProgression;
+};
+
+class SkArenaAllocWithReset : public SkArenaAlloc {
+public:
+    SkArenaAllocWithReset(char* block, size_t blockSize, size_t firstHeapAllocation);
+
+    explicit SkArenaAllocWithReset(size_t firstHeapAllocation)
+            : SkArenaAllocWithReset(nullptr, 0, firstHeapAllocation) {}
+
+    // Destroy all allocated objects, free any heap allocations.
+    void reset();
+
+private:
+    char* const    fFirstBlock;
+    const uint32_t fFirstSize;
+    const uint32_t fFirstHeapAllocationSize;
+};
+
+// Helper for defining allocators with inline/reserved storage.
+// For argument declarations, stick to the base type (SkArenaAlloc).
+// Note: Inheriting from the storage first means the storage will outlive the
+// SkArenaAlloc, letting ~SkArenaAlloc read it as it calls destructors.
+// (This is mostly only relevant for strict tools like MSAN.)
+template <size_t InlineStorageSize>
+class SkSTArenaAlloc : private std::array<char, InlineStorageSize>, public SkArenaAlloc {
+public:
+    explicit SkSTArenaAlloc(size_t firstHeapAllocation = InlineStorageSize)
+        : SkArenaAlloc{this->data(), this->size(), firstHeapAllocation} {}
+
+    ~SkSTArenaAlloc() {
+        // Be sure to unpoison the memory that is probably on the stack.
+        sk_asan_unpoison_memory_region(this->data(), this->size());
+    }
+};
+
+template <size_t InlineStorageSize>
+class SkSTArenaAllocWithReset
+        : private std::array<char, InlineStorageSize>, public SkArenaAllocWithReset {
+public:
+    explicit SkSTArenaAllocWithReset(size_t firstHeapAllocation = InlineStorageSize)
+            : SkArenaAllocWithReset{this->data(), this->size(), firstHeapAllocation} {}
+
+    ~SkSTArenaAllocWithReset() {
+        // Be sure to unpoison the memory that is probably on the stack.
+        sk_asan_unpoison_memory_region(this->data(), this->size());
+    }
+};
+
+#endif  // SkArenaAlloc_DEFINED
diff --git a/gfx/skia/skia/src/base/SkArenaAllocList.h b/gfx/skia/skia/src/base/SkArenaAllocList.h
new file mode 100644
index 0000000000..57bce52023
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkArenaAllocList.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkArenaAllocList_DEFINED
+#define SkArenaAllocList_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "src/base/SkArenaAlloc.h" // IWYU pragma: keep
+
+#include <utility>
+
+/**
+ * A singly linked list of Ts stored in a SkArenaAlloc. The arena rather than the list owns
+ * the elements. This supports forward iteration and range based for loops.
+ */
+template <typename T>
+class SkArenaAllocList {
+private:
+    struct Node;
+
+public:
+    SkArenaAllocList() = default;
+
+    void reset() { fHead = fTail = nullptr; }
+
+    template <typename... Args>
+    inline T& append(SkArenaAlloc* arena, Args... args);
+
+    class Iter {
+    public:
+        Iter() = default;
+        inline Iter& operator++();
+        T& operator*() const { return fCurr->fT; }
+        T* operator->() const { return &fCurr->fT; }
+        bool operator==(const Iter& that) const { return fCurr == that.fCurr; }
+        bool operator!=(const Iter& that) const { return !(*this == that); }
+
+    private:
+        friend class SkArenaAllocList;
+        explicit Iter(Node* node) : fCurr(node) {}
+        Node* fCurr = nullptr;
+    };
+
+    Iter begin() { return Iter(fHead); }
+    Iter end() { return Iter(); }
+    Iter tail() { return Iter(fTail); }
+
+private:
+    struct Node {
+        template <typename... Args>
+        Node(Args... args) : fT(std::forward<Args>(args)...) {}
+        T fT;
+        Node* fNext = nullptr;
+    };
+    Node* fHead = nullptr;
+    Node* fTail = nullptr;
+};
+
+template <typename T>
+template <typename... Args>
+T& SkArenaAllocList<T>::append(SkArenaAlloc* arena, Args... args) {
+    SkASSERT(!fHead == !fTail);
+    auto* n = arena->make<Node>(std::forward<Args>(args)...);
+    if (!fTail) {
+        fHead = fTail = n;
+    } else {
+        fTail = fTail->fNext = n;
+    }
+    return fTail->fT;
+}
+
+template <typename T>
+typename SkArenaAllocList<T>::Iter& SkArenaAllocList<T>::Iter::operator++() {
+    fCurr = fCurr->fNext;
+    return *this;
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkAutoMalloc.h b/gfx/skia/skia/src/base/SkAutoMalloc.h
new file mode 100644
index 0000000000..6520cc0582
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkAutoMalloc.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkAutoMalloc_DEFINED
+#define SkAutoMalloc_DEFINED
+
+#include "include/private/base/SkAlign.h"
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkMalloc.h"
+#include "include/private/base/SkNoncopyable.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+/**
+ *  Manage an allocated block of heap memory. This object is the sole manager of
+ *  the lifetime of the block, so the caller must not call sk_free() or delete
+ *  on the block, unless release() was called.
+ */
+class SkAutoMalloc : SkNoncopyable {
+public:
+    explicit SkAutoMalloc(size_t size = 0)
+        : fPtr(size ? sk_malloc_throw(size) : nullptr), fSize(size) {}
+
+    /**
+     *  Passed to reset to specify what happens if the requested size is smaller
+     *  than the current size (and the current block was dynamically allocated).
+     */
+    enum OnShrink {
+        /**
+         *  If the requested size is smaller than the current size, and the
+         *  current block is dynamically allocated, free the old block and
+         *  malloc a new block of the smaller size.
+         */
+        kAlloc_OnShrink,
+
+        /**
+         *  If the requested size is smaller than the current size, and the
+         *  current block is dynamically allocated, just return the old
+         *  block.
+         */
+        kReuse_OnShrink
+    };
+
+    /**
+     *  Reallocates the block to a new size. The ptr may or may not change.
+     */
+    void* reset(size_t size = 0, OnShrink shrink = kAlloc_OnShrink) {
+        if (size != fSize && (size > fSize || kReuse_OnShrink != shrink)) {
+            fPtr.reset(size ? sk_malloc_throw(size) : nullptr);
+            fSize = size;
+        }
+        return fPtr.get();
+    }
+
+    /**
+     *  Return the allocated block.
+     */
+    void* get() { return fPtr.get(); }
+    const void* get() const { return fPtr.get(); }
+
+   /** Transfer ownership of the current ptr to the caller, setting the
+       internal reference to null. Note the caller is reponsible for calling
+       sk_free on the returned address.
+    */
+    void* release() {
+        fSize = 0;
+        return fPtr.release();
+    }
+
+private:
+    struct WrapFree {
+        void operator()(void* p) { sk_free(p); }
+    };
+    std::unique_ptr<void, WrapFree> fPtr;
+    size_t fSize;  // can be larger than the requested size (see kReuse)
+};
+
+/**
+ *  Manage an allocated block of memory. If the requested size is <= kSizeRequested (or slightly
+ *  more), then the allocation will come from the stack rather than the heap. This object is the
+ *  sole manager of the lifetime of the block, so the caller must not call sk_free() or delete on
+ *  the block.
+ */
+template <size_t kSizeRequested> class SkAutoSMalloc : SkNoncopyable {
+public:
+    /**
+     *  Creates initially empty storage. get() returns a ptr, but it is to a zero-byte allocation.
+     *  Must call reset(size) to return an allocated block.
+     */
+    SkAutoSMalloc() {
+        fPtr = fStorage;
+        fSize = kSize;
+    }
+
+    /**
+     *  Allocate a block of the specified size. If size <= kSizeRequested (or slightly more), then
+     *  the allocation will come from the stack, otherwise it will be dynamically allocated.
+     */
+    explicit SkAutoSMalloc(size_t size) {
+        fPtr = fStorage;
+        fSize = kSize;
+        this->reset(size);
+    }
+
+    /**
+     *  Free the allocated block (if any). If the block was small enough to have been allocated on
+     *  the stack, then this does nothing.
+     */
+    ~SkAutoSMalloc() {
+        if (fPtr != (void*)fStorage) {
+            sk_free(fPtr);
+        }
+    }
+
+    /**
+     *  Return the allocated block. May return non-null even if the block is of zero size. Since
+     *  this may be on the stack or dynamically allocated, the caller must not call sk_free() on it,
+     *  but must rely on SkAutoSMalloc to manage it.
+     */
+    void* get() const { return fPtr; }
+
+    /**
+     *  Return a new block of the requested size, freeing (as necessary) any previously allocated
+     *  block. As with the constructor, if size <= kSizeRequested (or slightly more) then the return
+     *  block may be allocated locally, rather than from the heap.
+     */
+    void* reset(size_t size,
+                SkAutoMalloc::OnShrink shrink = SkAutoMalloc::kAlloc_OnShrink,
+                bool* didChangeAlloc = nullptr) {
+        size = (size < kSize) ? kSize : size;
+        bool alloc = size != fSize && (SkAutoMalloc::kAlloc_OnShrink == shrink || size > fSize);
+        if (didChangeAlloc) {
+            *didChangeAlloc = alloc;
+        }
+        if (alloc) {
+            if (fPtr != (void*)fStorage) {
+                sk_free(fPtr);
+            }
+
+            if (size == kSize) {
+                SkASSERT(fPtr != fStorage); // otherwise we lied when setting didChangeAlloc.
+                fPtr = fStorage;
+            } else {
+                fPtr = sk_malloc_throw(size);
+            }
+
+            fSize = size;
+        }
+        SkASSERT(fSize >= size && fSize >= kSize);
+        SkASSERT((fPtr == fStorage) || fSize > kSize);
+        return fPtr;
+    }
+
+private:
+    // Align up to 32 bits.
+    static const size_t kSizeAlign4 = SkAlign4(kSizeRequested);
+#if defined(SK_BUILD_FOR_GOOGLE3)
+    // Stack frame size is limited for SK_BUILD_FOR_GOOGLE3. 4k is less than the actual max, but some functions
+    // have multiple large stack allocations.
+    static const size_t kMaxBytes = 4 * 1024;
+    static const size_t kSize = kSizeRequested > kMaxBytes ? kMaxBytes : kSizeAlign4;
+#else
+    static const size_t kSize = kSizeAlign4;
+#endif
+
+    void*       fPtr;
+    size_t      fSize;  // can be larger than the requested size (see kReuse)
+    uint32_t    fStorage[kSize >> 2];
+};
+// Can't guard the constructor because it's a template class.
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkBezierCurves.cpp b/gfx/skia/skia/src/base/SkBezierCurves.cpp
new file mode 100644
index 0000000000..a79129ff7d
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBezierCurves.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2012 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkBezierCurves.h"
+
+#include "include/private/base/SkAssert.h"
+
+#include <cstddef>
+
+static inline double interpolate(double A, double B, double t) {
+    return A + (B - A) * t;
+}
+
+std::array<double, 2> SkBezierCubic::EvalAt(const double curve[8], double t) {
+    const auto in_X = [&curve](size_t n) { return curve[2*n]; };
+    const auto in_Y = [&curve](size_t n) { return curve[2*n + 1]; };
+
+    // Two semi-common fast paths
+    if (t == 0) {
+        return {in_X(0), in_Y(0)};
+    }
+    if (t == 1) {
+        return {in_X(3), in_Y(3)};
+    }
+    // X(t) = X_0*(1-t)^3 + 3*X_1*t(1-t)^2 + 3*X_2*t^2(1-t) + X_3*t^3
+    // Y(t) = Y_0*(1-t)^3 + 3*Y_1*t(1-t)^2 + 3*Y_2*t^2(1-t) + Y_3*t^3
+    // Some compilers are smart enough and have sufficient registers/intrinsics to write optimal
+    // code from
+    //    double one_minus_t = 1 - t;
+    //    double a = one_minus_t * one_minus_t * one_minus_t;
+    //    double b = 3 * one_minus_t * one_minus_t * t;
+    //    double c = 3 * one_minus_t * t * t;
+    //    double d = t * t * t;
+    // However, some (e.g. when compiling for ARM) fail to do so, so we use this form
+    // to help more compilers generate smaller/faster ASM. https://godbolt.org/z/M6jG9x45c
+    double one_minus_t = 1 - t;
+    double one_minus_t_squared = one_minus_t * one_minus_t;
+    double a = (one_minus_t_squared * one_minus_t);
+    double b = 3 * one_minus_t_squared * t;
+    double t_squared = t * t;
+    double c = 3 * one_minus_t * t_squared;
+    double d = t_squared * t;
+
+    return {a * in_X(0) + b * in_X(1) + c * in_X(2) + d * in_X(3),
+            a * in_Y(0) + b * in_Y(1) + c * in_Y(2) + d * in_Y(3)};
+}
+
+// Perform subdivision using De Casteljau's algorithm, that is, repeated linear
+// interpolation between adjacent points.
+void SkBezierCubic::Subdivide(const double curve[8], double t,
+                              double twoCurves[14]) {
+    SkASSERT(0.0 <= t && t <= 1.0);
+    // We split the curve "in" into two curves "alpha" and "beta"
+    const auto in_X = [&curve](size_t n) { return curve[2*n]; };
+    const auto in_Y = [&curve](size_t n) { return curve[2*n + 1]; };
+    const auto alpha_X = [&twoCurves](size_t n) -> double& { return twoCurves[2*n]; };
+    const auto alpha_Y = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 1]; };
+    const auto beta_X = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 6]; };
+    const auto beta_Y = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 7]; };
+
+    alpha_X(0) = in_X(0);
+    alpha_Y(0) = in_Y(0);
+
+    beta_X(3) = in_X(3);
+    beta_Y(3) = in_Y(3);
+
+    double x01 = interpolate(in_X(0), in_X(1), t);
+    double y01 = interpolate(in_Y(0), in_Y(1), t);
+    double x12 = interpolate(in_X(1), in_X(2), t);
+    double y12 = interpolate(in_Y(1), in_Y(2), t);
+    double x23 = interpolate(in_X(2), in_X(3), t);
+    double y23 = interpolate(in_Y(2), in_Y(3), t);
+
+    alpha_X(1) = x01;
+    alpha_Y(1) = y01;
+
+    beta_X(2) = x23;
+    beta_Y(2) = y23;
+
+    alpha_X(2) = interpolate(x01, x12, t);
+    alpha_Y(2) = interpolate(y01, y12, t);
+
+    beta_X(1) = interpolate(x12, x23, t);
+    beta_Y(1) = interpolate(y12, y23, t);
+
+    alpha_X(3) /*= beta_X(0) */ = interpolate(alpha_X(2), beta_X(1), t);
+    alpha_Y(3) /*= beta_Y(0) */ = interpolate(alpha_Y(2), beta_Y(1), t);
+}
+
+std::array<double, 4> SkBezierCubic::ConvertToPolynomial(const double curve[8], bool yValues) {
+    const double* offset_curve = yValues ? curve + 1 : curve;
+    const auto P = [&offset_curve](size_t n) { return offset_curve[2*n]; };
+    // A cubic Bézier curve is interpolated as follows:
+    //  c(t) = (1 - t)^3 P_0 + 3t(1 - t)^2 P_1 + 3t^2 (1 - t) P_2 + t^3 P_3
+    //       = (-P_0 + 3P_1 + -3P_2 + P_3) t^3 + (3P_0 - 6P_1 + 3P_2) t^2 +
+    //         (-3P_0 + 3P_1) t + P_0
+    // Where P_N is the Nth point. The second step expands the polynomial and groups
+    // by powers of t. The desired output is a cubic formula, so we just need to
+    // combine the appropriate points to make the coefficients.
+    std::array<double, 4> results;
+    results[0] = -P(0) + 3*P(1) - 3*P(2) + P(3);
+    results[1] = 3*P(0) - 6*P(1) + 3*P(2);
+    results[2] = -3*P(0) + 3*P(1);
+    results[3] = P(0);
+    return results;
+}
+
diff --git a/gfx/skia/skia/src/base/SkBezierCurves.h b/gfx/skia/skia/src/base/SkBezierCurves.h
new file mode 100644
index 0000000000..772fee4bf7
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBezierCurves.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkBezierCurves_DEFINED
+#define SkBezierCurves_DEFINED
+
+#include <array>
+
+/**
+ * Utilities for dealing with cubic Bézier curves. These have a start XY
+ * point, an end XY point, and two control XY points in between. They take
+ * a parameter t which is between 0 and 1 (inclusive) which is used to
+ * interpolate between the start and end points, via a route dictated by
+ * the control points, and return a new XY point.
+ *
+ * We store a Bézier curve as an array of 8 floats or doubles, where
+ * the even indices are the X coordinates, and the odd indices are the Y
+ * coordinates.
+ */
+class SkBezierCubic {
+public:
+
+    /**
+     * Evaluates the cubic Bézier curve for a given t. It returns an X and Y coordinate
+     * following the formula, which does the interpolation mentioned above.
+     *     X(t) = X_0*(1-t)^3 + 3*X_1*t(1-t)^2 + 3*X_2*t^2(1-t) + X_3*t^3
+     *     Y(t) = Y_0*(1-t)^3 + 3*Y_1*t(1-t)^2 + 3*Y_2*t^2(1-t) + Y_3*t^3
+     *
+     * t is typically in the range [0, 1], but this function will not assert that,
+     * as Bézier curves are well-defined for any real number input.
+     */
+    static std::array<double, 2> EvalAt(const double curve[8], double t);
+
+    /**
+     * Splits the provided Bézier curve at the location t, resulting in two
+     * Bézier curves that share a point (the end point from curve 1
+     * and the start point from curve 2 are the same).
+     *
+     * t must be in the interval [0, 1].
+     *
+     * The provided twoCurves array will be filled such that indices
+     * 0-7 are the first curve (representing the interval [0, t]), and
+     * indices 6-13 are the second curve (representing [t, 1]).
+     */
+    static void Subdivide(const double curve[8], double t,
+                          double twoCurves[14]);
+
+    /**
+     * Converts the provided Bézier curve into the the equivalent cubic
+     *    f(t) = A*t^3 + B*t^2 + C*t + D
+     * where f(t) will represent Y coordinates over time if yValues is
+     * true and the X coordinates if yValues is false.
+     *
+     * In effect, this turns the control points into an actual line, representing
+     * the x or y values.
+     */
+    static std::array<double, 4> ConvertToPolynomial(const double curve[8], bool yValues);
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkBlockAllocator.cpp b/gfx/skia/skia/src/base/SkBlockAllocator.cpp
new file mode 100644
index 0000000000..e62fc2078d
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBlockAllocator.cpp
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkBlockAllocator.h"
+
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkTo.h"
+
+#ifdef SK_DEBUG
+#include <vector>
+#endif
+
+SkBlockAllocator::SkBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes,
+                                   size_t additionalPreallocBytes)
+        : fTail(&fHead)
+        // Round up to the nearest max-aligned value, and then divide so that fBlockSizeIncrement
+        // can effectively fit higher byte counts in its 16 bits of storage
+        , fBlockIncrement(SkTo<uint16_t>(
+                std::min(SkAlignTo(blockIncrementBytes, kAddressAlign) / kAddressAlign,
+                         (size_t) std::numeric_limits<uint16_t>::max())))
+        , fGrowthPolicy(static_cast<uint64_t>(policy))
+        , fN0((policy == GrowthPolicy::kLinear || policy == GrowthPolicy::kExponential) ? 1 : 0)
+        , fN1(1)
+        // The head block always fills remaining space from SkBlockAllocator's size, because it's
+        // inline, but can take over the specified number of bytes immediately after it.
+        , fHead(/*prev=*/nullptr, additionalPreallocBytes + BaseHeadBlockSize()) {
+    SkASSERT(fBlockIncrement >= 1);
+    SkASSERT(additionalPreallocBytes <= kMaxAllocationSize);
+}
+
+SkBlockAllocator::Block::Block(Block* prev, int allocationSize)
+         : fNext(nullptr)
+         , fPrev(prev)
+         , fSize(allocationSize)
+         , fCursor(kDataStart)
+         , fMetadata(0)
+         , fAllocatorMetadata(0) {
+    SkASSERT(allocationSize >= (int) sizeof(Block));
+    SkDEBUGCODE(fSentinel = kAssignedMarker;)
+
+    this->poisonRange(kDataStart, fSize);
+}
+
+SkBlockAllocator::Block::~Block() {
+    this->unpoisonRange(kDataStart, fSize);
+
+    SkASSERT(fSentinel == kAssignedMarker);
+    SkDEBUGCODE(fSentinel = kFreedMarker;) // FWIW
+}
+
+size_t SkBlockAllocator::totalSize() const {
+    // Use size_t since the sum across all blocks could exceed 'int', even though each block won't
+    size_t size = offsetof(SkBlockAllocator, fHead) + this->scratchBlockSize();
+    for (const Block* b : this->blocks()) {
+        size += b->fSize;
+    }
+    SkASSERT(size >= this->preallocSize());
+    return size;
+}
+
+size_t SkBlockAllocator::totalUsableSpace() const {
+    size_t size = this->scratchBlockSize();
+    if (size > 0) {
+        size -= kDataStart; // scratchBlockSize reports total block size, not usable size
+    }
+    for (const Block* b : this->blocks()) {
+        size += (b->fSize - kDataStart);
+    }
+    SkASSERT(size >= this->preallocUsableSpace());
+    return size;
+}
+
+size_t SkBlockAllocator::totalSpaceInUse() const {
+    size_t size = 0;
+    for (const Block* b : this->blocks()) {
+        size += (b->fCursor - kDataStart);
+    }
+    SkASSERT(size <= this->totalUsableSpace());
+    return size;
+}
+
+SkBlockAllocator::Block* SkBlockAllocator::findOwningBlock(const void* p) {
+    // When in doubt, search in reverse to find an overlapping block.
+    uintptr_t ptr = reinterpret_cast<uintptr_t>(p);
+    for (Block* b : this->rblocks()) {
+        uintptr_t lowerBound = reinterpret_cast<uintptr_t>(b) + kDataStart;
+        uintptr_t upperBound = reinterpret_cast<uintptr_t>(b) + b->fSize;
+        if (lowerBound <= ptr && ptr < upperBound) {
+            SkASSERT(b->fSentinel == kAssignedMarker);
+            return b;
+        }
+    }
+    return nullptr;
+}
+
+void SkBlockAllocator::releaseBlock(Block* block) {
+     if (block == &fHead) {
+        // Reset the cursor of the head block so that it can be reused if it becomes the new tail
+        block->fCursor = kDataStart;
+        block->fMetadata = 0;
+        block->poisonRange(kDataStart, block->fSize);
+        // Unlike in reset(), we don't set the head's next block to null because there are
+        // potentially heap-allocated blocks that are still connected to it.
+    } else {
+        SkASSERT(block->fPrev);
+        block->fPrev->fNext = block->fNext;
+        if (block->fNext) {
+            SkASSERT(fTail != block);
+            block->fNext->fPrev = block->fPrev;
+        } else {
+            SkASSERT(fTail == block);
+            fTail = block->fPrev;
+        }
+
+        // The released block becomes the new scratch block (if it's bigger), or delete it
+        if (this->scratchBlockSize() < block->fSize) {
+            SkASSERT(block != fHead.fPrev); // shouldn't already be the scratch block
+            if (fHead.fPrev) {
+                delete fHead.fPrev;
+            }
+            block->markAsScratch();
+            fHead.fPrev = block;
+        } else {
+            delete block;
+        }
+    }
+
+    // Decrement growth policy (opposite of addBlock()'s increment operations)
+    GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy);
+    if (fN0 > 0 && (fN1 > 1 || gp == GrowthPolicy::kFibonacci)) {
+        SkASSERT(gp != GrowthPolicy::kFixed); // fixed never needs undoing, fN0 always is 0
+        if (gp == GrowthPolicy::kLinear) {
+            fN1 = fN1 - fN0;
+        } else if (gp == GrowthPolicy::kFibonacci) {
+            // Subtract n0 from n1 to get the prior 2 terms in the fibonacci sequence
+            int temp = fN1 - fN0; // yields prior fN0
+            fN1 = fN1 - temp;     // yields prior fN1
+            fN0 = temp;
+        } else {
+            SkASSERT(gp == GrowthPolicy::kExponential);
+            // Divide by 2 to undo the 2N update from addBlock
+            fN1 = fN1 >> 1;
+            fN0 = fN1;
+        }
+    }
+
+    SkASSERT(fN1 >= 1 && fN0 >= 0);
+}
+
+void SkBlockAllocator::stealHeapBlocks(SkBlockAllocator* other) {
+    Block* toSteal = other->fHead.fNext;
+    if (toSteal) {
+        // The other's next block connects back to this allocator's current tail, and its new tail
+        // becomes the end of other's block linked list.
+        SkASSERT(other->fTail != &other->fHead);
+        toSteal->fPrev = fTail;
+        fTail->fNext = toSteal;
+        fTail = other->fTail;
+        // The other allocator becomes just its inline head block
+        other->fTail = &other->fHead;
+        other->fHead.fNext = nullptr;
+    } // else no block to steal
+}
+
+void SkBlockAllocator::reset() {
+    for (Block* b : this->rblocks()) {
+        if (b == &fHead) {
+            // Reset metadata and cursor, tail points to the head block again
+            fTail = b;
+            b->fNext = nullptr;
+            b->fCursor = kDataStart;
+            b->fMetadata = 0;
+            // For reset(), but NOT releaseBlock(), the head allocatorMetadata and scratch block
+            // are reset/destroyed.
+            b->fAllocatorMetadata = 0;
+            b->poisonRange(kDataStart, b->fSize);
+            this->resetScratchSpace();
+        } else {
+            delete b;
+        }
+    }
+    SkASSERT(fTail == &fHead && fHead.fNext == nullptr && fHead.fPrev == nullptr &&
+             fHead.metadata() == 0 && fHead.fCursor == kDataStart);
+
+    GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy);
+    fN0 = (gp == GrowthPolicy::kLinear || gp == GrowthPolicy::kExponential) ? 1 : 0;
+    fN1 = 1;
+}
+
+void SkBlockAllocator::resetScratchSpace() {
+    if (fHead.fPrev) {
+        delete fHead.fPrev;
+        fHead.fPrev = nullptr;
+    }
+}
+
+void SkBlockAllocator::addBlock(int minSize, int maxSize) {
+    SkASSERT(minSize > (int) sizeof(Block) && minSize <= maxSize);
+
+    // Max positive value for uint:23 storage (decltype(fN0) picks up uint64_t, not uint:23).
+    static constexpr int kMaxN = (1 << 23) - 1;
+    static_assert(2 * kMaxN <= std::numeric_limits<int32_t>::max()); // Growth policy won't overflow
+
+    auto alignAllocSize = [](int size) {
+        // Round to a nice boundary since the block isn't maxing out:
+        //   if allocSize > 32K, aligns on 4K boundary otherwise aligns on max_align_t, to play
+        //   nicely with jeMalloc (from SkArenaAlloc).
+        int mask = size > (1 << 15) ? ((1 << 12) - 1) : (kAddressAlign - 1);
+        return (size + mask) & ~mask;
+    };
+
+    int allocSize;
+    void* mem = nullptr;
+    if (this->scratchBlockSize() >= minSize) {
+        // Activate the scratch block instead of making a new block
+        SkASSERT(fHead.fPrev->isScratch());
+        allocSize = fHead.fPrev->fSize;
+        mem = fHead.fPrev;
+        fHead.fPrev = nullptr;
+    } else if (minSize < maxSize) {
+        // Calculate the 'next' size per growth policy sequence
+        GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy);
+        int nextN1 = fN0 + fN1;
+        int nextN0;
+        if (gp == GrowthPolicy::kFixed || gp == GrowthPolicy::kLinear) {
+            nextN0 = fN0;
+        } else if (gp == GrowthPolicy::kFibonacci) {
+            nextN0 = fN1;
+        } else {
+            SkASSERT(gp == GrowthPolicy::kExponential);
+            nextN0 = nextN1;
+        }
+        fN0 = std::min(kMaxN, nextN0);
+        fN1 = std::min(kMaxN, nextN1);
+
+        // However, must guard against overflow here, since all the size-based asserts prevented
+        // alignment/addition overflows, while multiplication requires 2x bits instead of x+1.
+        int sizeIncrement = fBlockIncrement * kAddressAlign;
+        if (maxSize / sizeIncrement < nextN1) {
+            // The growth policy would overflow, so use the max. We've already confirmed that
+            // maxSize will be sufficient for the requested minimumSize
+            allocSize = maxSize;
+        } else {
+            allocSize = std::min(alignAllocSize(std::max(minSize, sizeIncrement * nextN1)),
+                                 maxSize);
+        }
+    } else {
+        SkASSERT(minSize == maxSize);
+        // Still align on a nice boundary, no max clamping since that would just undo the alignment
+        allocSize = alignAllocSize(minSize);
+    }
+
+    // Create new block and append to the linked list of blocks in this allocator
+    if (!mem) {
+        mem = operator new(allocSize);
+    }
+    fTail->fNext = new (mem) Block(fTail, allocSize);
+    fTail = fTail->fNext;
+}
+
+#ifdef SK_DEBUG
+void SkBlockAllocator::validate() const {
+    std::vector<const Block*> blocks;
+    const Block* prev = nullptr;
+    for (const Block* block : this->blocks()) {
+        blocks.push_back(block);
+
+        SkASSERT(kAssignedMarker == block->fSentinel);
+        if (block == &fHead) {
+            // The head blocks' fPrev may be non-null if it holds a scratch block, but that's not
+            // considered part of the linked list
+            SkASSERT(!prev && (!fHead.fPrev || fHead.fPrev->isScratch()));
+        } else {
+            SkASSERT(prev == block->fPrev);
+        }
+        if (prev) {
+            SkASSERT(prev->fNext == block);
+        }
+
+        SkASSERT(block->fSize >= (int) sizeof(Block));
+        SkASSERT(block->fCursor >= kDataStart);
+        SkASSERT(block->fCursor <= block->fSize);
+
+        prev = block;
+    }
+    SkASSERT(prev == fTail);
+    SkASSERT(!blocks.empty());
+    SkASSERT(blocks[0] == &fHead);
+
+    // Confirm reverse iteration matches forward iteration
+    size_t j = blocks.size();
+    for (const Block* b : this->rblocks()) {
+        SkASSERT(b == blocks[j - 1]);
+        j--;
+    }
+    SkASSERT(j == 0);
+}
+#endif
diff --git a/gfx/skia/skia/src/base/SkBlockAllocator.h b/gfx/skia/skia/src/base/SkBlockAllocator.h
new file mode 100644
index 0000000000..02201c17d4
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBlockAllocator.h
@@ -0,0 +1,754 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkBlockAllocator_DEFINED
+#define SkBlockAllocator_DEFINED
+
+#include "include/private/base/SkAlign.h"
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkMacros.h"
+#include "include/private/base/SkMath.h"
+#include "include/private/base/SkNoncopyable.h"
+#include "src/base/SkASAN.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <new>
+#include <type_traits>
+
+/**
+ * SkBlockAllocator provides low-level support for a block allocated arena with a dynamic tail that
+ * tracks space reservations within each block. Its APIs provide the ability to reserve space,
+ * resize reservations, and release reservations. It will automatically create new blocks if needed
+ * and destroy all remaining blocks when it is destructed. It assumes that anything allocated within
+ * its blocks has its destructors called externally. It is recommended that SkBlockAllocator is
+ * wrapped by a higher-level allocator that uses the low-level APIs to implement a simpler,
+ * purpose-focused API w/o having to worry as much about byte-level concerns.
+ *
+ * SkBlockAllocator has no limit to its total size, but each allocation is limited to 512MB (which
+ * should be sufficient for Skia's use cases). This upper allocation limit allows all internal
+ * operations to be performed using 'int' and avoid many overflow checks. Static asserts are used
+ * to ensure that those operations would not overflow when using the largest possible values.
+ *
+ * Possible use modes:
+ * 1. No upfront allocation, either on the stack or as a field
+ *    SkBlockAllocator allocator(policy, heapAllocSize);
+ *
+ * 2. In-place new'd
+ *    void* mem = operator new(totalSize);
+ *    SkBlockAllocator* allocator = new (mem) SkBlockAllocator(policy, heapAllocSize,
+ *                                                             totalSize- sizeof(SkBlockAllocator));
+ *    delete allocator;
+ *
+ * 3. Use SkSBlockAllocator to increase the preallocation size
+ *    SkSBlockAllocator<1024> allocator(policy, heapAllocSize);
+ *    sizeof(allocator) == 1024;
+ */
+// TODO(michaelludwig) - While API is different, this shares similarities to SkArenaAlloc and
+// SkFibBlockSizes, so we should work to integrate them.
+class SkBlockAllocator final : SkNoncopyable {
+public:
+    // Largest size that can be requested from allocate(), chosen because it's the largest pow-2
+    // that is less than int32_t::max()/2.
+    inline static constexpr int kMaxAllocationSize = 1 << 29;
+
+    enum class GrowthPolicy : int {
+        kFixed,       // Next block size = N
+        kLinear,      //   = #blocks * N
+        kFibonacci,   //   = fibonacci(#blocks) * N
+        kExponential, //   = 2^#blocks * N
+        kLast = kExponential
+    };
+    inline static constexpr int kGrowthPolicyCount = static_cast<int>(GrowthPolicy::kLast) + 1;
+
+    class Block final {
+    public:
+        ~Block();
+        void operator delete(void* p) { ::operator delete(p); }
+
+        // Return the maximum allocation size with the given alignment that can fit in this block.
+        template <size_t Align = 1, size_t Padding = 0>
+        int avail() const { return std::max(0, fSize - this->cursor<Align, Padding>()); }
+
+        // Return the aligned offset of the first allocation, assuming it was made with the
+        // specified Align, and Padding. The returned offset does not mean a valid allocation
+        // starts at that offset, this is a utility function for classes built on top to manage
+        // indexing into a block effectively.
+        template <size_t Align = 1, size_t Padding = 0>
+        int firstAlignedOffset() const { return this->alignedOffset<Align, Padding>(kDataStart); }
+
+        // Convert an offset into this block's storage into a usable pointer.
+        void* ptr(int offset) {
+            SkASSERT(offset >= kDataStart && offset < fSize);
+            return reinterpret_cast<char*>(this) + offset;
+        }
+        const void* ptr(int offset) const { return const_cast<Block*>(this)->ptr(offset); }
+
+        // Every block has an extra 'int' for clients to use however they want. It will start
+        // at 0 when a new block is made, or when the head block is reset.
+        int metadata() const { return fMetadata; }
+        void setMetadata(int value) { fMetadata = value; }
+
+        /**
+         * Release the byte range between offset 'start' (inclusive) and 'end' (exclusive). This
+         * will return true if those bytes were successfully reclaimed, i.e. a subsequent allocation
+         * request could occupy the space. Regardless of return value, the provided byte range that
+         * [start, end) represents should not be used until it's re-allocated with allocate<...>().
+         */
+        inline bool release(int start, int end);
+
+        /**
+         * Resize a previously reserved byte range of offset 'start' (inclusive) to 'end'
+         * (exclusive). 'deltaBytes' is the SIGNED change to length of the reservation.
+         *
+         * When negative this means the reservation is shrunk and the new length is (end - start -
+         * |deltaBytes|). If this new length would be 0, the byte range can no longer be used (as if
+         * it were released instead). Asserts that it would not shrink the reservation below 0.
+         *
+         * If 'deltaBytes' is positive, the allocator attempts to increase the length of the
+         * reservation. If 'deltaBytes' is less than or equal to avail() and it was the last
+         * allocation in the block, it can be resized. If there is not enough available bytes to
+         * accommodate the increase in size, or another allocation is blocking the increase in size,
+         * then false will be returned and the reserved byte range is unmodified.
+         */
+        inline bool resize(int start, int end, int deltaBytes);
+
+    private:
+        friend class SkBlockAllocator;
+
+        Block(Block* prev, int allocationSize);
+
+        // We poison the unallocated space in a Block to allow ASAN to catch invalid writes.
+        void poisonRange(int start, int end) {
+            sk_asan_poison_memory_region(reinterpret_cast<char*>(this) + start, end - start);
+        }
+        void unpoisonRange(int start, int end) {
+            sk_asan_unpoison_memory_region(reinterpret_cast<char*>(this) + start, end - start);
+        }
+
+        // Get fCursor, but aligned such that ptr(rval) satisfies Align.
+        template <size_t Align, size_t Padding>
+        int cursor() const { return this->alignedOffset<Align, Padding>(fCursor); }
+
+        template <size_t Align, size_t Padding>
+        int alignedOffset(int offset) const;
+
+        bool isScratch() const { return fCursor < 0; }
+        void markAsScratch() {
+            fCursor = -1;
+            this->poisonRange(kDataStart, fSize);
+        }
+
+        SkDEBUGCODE(uint32_t fSentinel;)  // known value to check for bad back pointers to blocks
+
+        Block*          fNext;      // doubly-linked list of blocks
+        Block*          fPrev;
+
+        // Each block tracks its own cursor because as later blocks are released, an older block
+        // may become the active tail again.
+        int             fSize;      // includes the size of the BlockHeader and requested metadata
+        int             fCursor;    // (this + fCursor) points to next available allocation
+        int             fMetadata;
+
+        // On release builds, a Block's other 2 pointers and 3 int fields leaves 4 bytes of padding
+        // for 8 and 16 aligned systems. Currently this is only manipulated in the head block for
+        // an allocator-level metadata and is explicitly not reset when the head block is "released"
+        // Down the road we could instead choose to offer multiple metadata slots per block.
+        int             fAllocatorMetadata;
+    };
+
+    // Tuple representing a range of bytes, marking the unaligned start, the first aligned point
+    // after any padding, and the upper limit depending on requested size.
+    struct ByteRange {
+        Block* fBlock;         // Owning block
+        int    fStart;         // Inclusive byte lower limit of byte range
+        int    fAlignedOffset; // >= start, matching alignment requirement (i.e. first real byte)
+        int    fEnd;           // Exclusive upper limit of byte range
+    };
+
+    // The size of the head block is determined by 'additionalPreallocBytes'. Subsequent heap blocks
+    // are determined by 'policy' and 'blockIncrementBytes', although 'blockIncrementBytes' will be
+    // aligned to std::max_align_t.
+    //
+    // When 'additionalPreallocBytes' > 0, the allocator assumes that many extra bytes immediately
+    // after the allocator can be used by its inline head block. This is useful when the allocator
+    // is in-place new'ed into a larger block of memory, but it should remain set to 0 if stack
+    // allocated or if the class layout does not guarantee that space is present.
+    SkBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes,
+                     size_t additionalPreallocBytes = 0);
+
+    ~SkBlockAllocator() { this->reset(); }
+    void operator delete(void* p) { ::operator delete(p); }
+
+    /**
+     * Helper to calculate the minimum number of bytes needed for heap block size, under the
+     * assumption that Align will be the requested alignment of the first call to allocate().
+     * Ex. To store N instances of T in a heap block, the 'blockIncrementBytes' should be set to
+     *   BlockOverhead<alignof(T)>() + N * sizeof(T) when making the SkBlockAllocator.
+     */
+    template<size_t Align = 1, size_t Padding = 0>
+    static constexpr size_t BlockOverhead();
+
+    /**
+     * Helper to calculate the minimum number of bytes needed for a preallocation, under the
+     * assumption that Align will be the requested alignment of the first call to allocate().
+     * Ex. To preallocate a SkSBlockAllocator to hold N instances of T, its arge should be
+     *   Overhead<alignof(T)>() + N * sizeof(T)
+     */
+    template<size_t Align = 1, size_t Padding = 0>
+    static constexpr size_t Overhead();
+
+    /**
+     * Return the total number of bytes of the allocator, including its instance overhead, per-block
+     * overhead and space used for allocations.
+     */
+    size_t totalSize() const;
+    /**
+     * Return the total number of bytes usable for allocations. This includes bytes that have
+     * been reserved already by a call to allocate() and bytes that are still available. It is
+     * totalSize() minus all allocator and block-level overhead.
+     */
+    size_t totalUsableSpace() const;
+    /**
+     * Return the total number of usable bytes that have been reserved by allocations. This will
+     * be less than or equal to totalUsableSpace().
+     */
+    size_t totalSpaceInUse() const;
+
+    /**
+     * Return the total number of bytes that were pre-allocated for the SkBlockAllocator. This will
+     * include 'additionalPreallocBytes' passed to the constructor, and represents what the total
+     * size would become after a call to reset().
+     */
+    size_t preallocSize() const {
+        // Don't double count fHead's Block overhead in both sizeof(SkBlockAllocator) and fSize.
+        return sizeof(SkBlockAllocator) + fHead.fSize - BaseHeadBlockSize();
+    }
+    /**
+     * Return the usable size of the inline head block; this will be equal to
+     * 'additionalPreallocBytes' plus any alignment padding that the system had to add to Block.
+     * The returned value represents what could be allocated before a heap block is be created.
+     */
+    size_t preallocUsableSpace() const {
+        return fHead.fSize - kDataStart;
+    }
+
+    /**
+     * Get the current value of the allocator-level metadata (a user-oriented slot). This is
+     * separate from any block-level metadata, but can serve a similar purpose to compactly support
+     * data collections on top of SkBlockAllocator.
+     */
+    int metadata() const { return fHead.fAllocatorMetadata; }
+
+    /**
+     * Set the current value of the allocator-level metadata.
+     */
+    void setMetadata(int value) { fHead.fAllocatorMetadata = value; }
+
+    /**
+     * Reserve space that will hold 'size' bytes. This will automatically allocate a new block if
+     * there is not enough available space in the current block to provide 'size' bytes. The
+     * returned ByteRange tuple specifies the Block owning the reserved memory, the full byte range,
+     * and the aligned offset within that range to use for the user-facing pointer. The following
+     * invariants hold:
+     *
+     *  1. block->ptr(alignedOffset) is aligned to Align
+     *  2. end - alignedOffset == size
+     *  3. Padding <= alignedOffset - start <= Padding + Align - 1
+     *
+     * Invariant #3, when Padding > 0, allows intermediate allocators to embed metadata along with
+     * the allocations. If the Padding bytes are used for some 'struct Meta', then
+     * ptr(alignedOffset - sizeof(Meta)) can be safely used as a Meta* if Meta's alignment
+     * requirements are less than or equal to the alignment specified in allocate<>. This can be
+     * easily guaranteed by using the pattern:
+     *
+     *    allocate<max(UserAlign, alignof(Meta)), sizeof(Meta)>(userSize);
+     *
+     * This ensures that ptr(alignedOffset) will always satisfy UserAlign and
+     * ptr(alignedOffset - sizeof(Meta)) will always satisfy alignof(Meta).  Alternatively, memcpy
+     * can be used to read and write values between start and alignedOffset without worrying about
+     * alignment requirements of the metadata.
+     *
+     * For over-aligned allocations, the alignedOffset (as an int) may not be a multiple of Align,
+     * but the result of ptr(alignedOffset) will be a multiple of Align.
+     */
+    template <size_t Align, size_t Padding = 0>
+    ByteRange allocate(size_t size);
+
+    enum ReserveFlags : unsigned {
+        // If provided to reserve(), the input 'size' will be rounded up to the next size determined
+        // by the growth policy of the SkBlockAllocator. If not, 'size' will be aligned to max_align
+        kIgnoreGrowthPolicy_Flag  = 0b01,
+        // If provided to reserve(), the number of available bytes of the current block  will not
+        // be used to satisfy the reservation (assuming the contiguous range was long enough to
+        // begin with).
+        kIgnoreExistingBytes_Flag = 0b10,
+
+        kNo_ReserveFlags          = 0b00
+    };
+
+    /**
+     * Ensure the block allocator has 'size' contiguous available bytes. After calling this
+     * function, currentBlock()->avail<Align, Padding>() may still report less than 'size' if the
+     * reserved space was added as a scratch block. This is done so that anything remaining in
+     * the current block can still be used if a smaller-than-size allocation is requested. If 'size'
+     * is requested by a subsequent allocation, the scratch block will automatically be activated
+     * and the request will not itself trigger any malloc.
+     *
+     * The optional 'flags' controls how the input size is allocated; by default it will attempt
+     * to use available contiguous bytes in the current block and will respect the growth policy
+     * of the allocator.
+     */
+    template <size_t Align = 1, size_t Padding = 0>
+    void reserve(size_t size, ReserveFlags flags = kNo_ReserveFlags);
+
+    /**
+     * Return a pointer to the start of the current block. This will never be null.
+     */
+    const Block* currentBlock() const { return fTail; }
+    Block* currentBlock() { return fTail; }
+
+    const Block* headBlock() const { return &fHead; }
+    Block* headBlock() { return &fHead; }
+
+    /**
+     * Return the block that owns the allocated 'ptr'. Assuming that earlier, an allocation was
+     * returned as {b, start, alignedOffset, end}, and 'p = b->ptr(alignedOffset)', then a call
+     * to 'owningBlock<Align, Padding>(p, start) == b'.
+     *
+     * If calling code has already made a pointer to their metadata, i.e. 'm = p - Padding', then
+     * 'owningBlock<Align, 0>(m, start)' will also return b, allowing you to recover the block from
+     * the metadata pointer.
+     *
+     * If calling code has access to the original alignedOffset, this function should not be used
+     * since the owning block is just 'p - alignedOffset', regardless of original Align or Padding.
+     */
+    template <size_t Align, size_t Padding = 0>
+    Block* owningBlock(const void* ptr, int start);
+
+    template <size_t Align, size_t Padding = 0>
+    const Block* owningBlock(const void* ptr, int start) const {
+        return const_cast<SkBlockAllocator*>(this)->owningBlock<Align, Padding>(ptr, start);
+    }
+
+    /**
+     * Find the owning block of the allocated pointer, 'p'. Without any additional information this
+     * is O(N) on the number of allocated blocks.
+     */
+    Block* findOwningBlock(const void* ptr);
+    const Block* findOwningBlock(const void* ptr) const {
+        return const_cast<SkBlockAllocator*>(this)->findOwningBlock(ptr);
+    }
+
+    /**
+     * Explicitly free an entire block, invalidating any remaining allocations from the block.
+     * SkBlockAllocator will release all alive blocks automatically when it is destroyed, but this
+     * function can be used to reclaim memory over the lifetime of the allocator. The provided
+     * 'block' pointer must have previously come from a call to currentBlock() or allocate().
+     *
+     * If 'block' represents the inline-allocated head block, its cursor and metadata are instead
+     * reset to their defaults.
+     *
+     * If the block is not the head block, it may be kept as a scratch block to be reused for
+     * subsequent allocation requests, instead of making an entirely new block. A scratch block is
+     * not visible when iterating over blocks but is reported in the total size of the allocator.
+     */
+    void releaseBlock(Block* block);
+
+    /**
+     * Detach every heap-allocated block owned by 'other' and concatenate them to this allocator's
+     * list of blocks. This memory is now managed by this allocator. Since this only transfers
+     * ownership of a Block, and a Block itself does not move, any previous allocations remain
+     * valid and associated with their original Block instances. SkBlockAllocator-level functions
+     * that accept allocated pointers (e.g. findOwningBlock), must now use this allocator and not
+     * 'other' for these allocations.
+     *
+     * The head block of 'other' cannot be stolen, so higher-level allocators and memory structures
+     * must handle that data differently.
+     */
+    void stealHeapBlocks(SkBlockAllocator* other);
+
+    /**
+     * Explicitly free all blocks (invalidating all allocations), and resets the head block to its
+     * default state. The allocator-level metadata is reset to 0 as well.
+     */
+    void reset();
+
+    /**
+     * Remove any reserved scratch space, either from calling reserve() or releaseBlock().
+     */
+    void resetScratchSpace();
+
+    template <bool Forward, bool Const> class BlockIter;
+
+    /**
+     * Clients can iterate over all active Blocks in the SkBlockAllocator using for loops:
+     *
+     * Forward iteration from head to tail block (or non-const variant):
+     *   for (const Block* b : this->blocks()) { }
+     * Reverse iteration from tail to head block:
+     *   for (const Block* b : this->rblocks()) { }
+     *
+     * It is safe to call releaseBlock() on the active block while looping.
+     */
+    inline BlockIter<true, false> blocks();
+    inline BlockIter<true, true> blocks() const;
+    inline BlockIter<false, false> rblocks();
+    inline BlockIter<false, true> rblocks() const;
+
+#ifdef SK_DEBUG
+    inline static constexpr uint32_t kAssignedMarker = 0xBEEFFACE;
+    inline static constexpr uint32_t kFreedMarker = 0xCAFEBABE;
+
+    void validate() const;
+#endif
+
+private:
+    friend class BlockAllocatorTestAccess;
+    friend class TBlockListTestAccess;
+
+    inline static constexpr int kDataStart = sizeof(Block);
+    #ifdef SK_FORCE_8_BYTE_ALIGNMENT
+        // This is an issue for WASM builds using emscripten, which had std::max_align_t = 16, but
+        // was returning pointers only aligned to 8 bytes.
+        // https://github.com/emscripten-core/emscripten/issues/10072
+        //
+        // Setting this to 8 will let SkBlockAllocator properly correct for the pointer address if
+        // a 16-byte aligned allocation is requested in wasm (unlikely since we don't use long
+        // doubles).
+        inline static constexpr size_t kAddressAlign = 8;
+    #else
+        // The alignment Block addresses will be at when created using operator new
+        // (spec-compliant is pointers are aligned to max_align_t).
+        inline static constexpr size_t kAddressAlign = alignof(std::max_align_t);
+    #endif
+
+    // Calculates the size of a new Block required to store a kMaxAllocationSize request for the
+    // given alignment and padding bytes. Also represents maximum valid fCursor value in a Block.
+    template<size_t Align, size_t Padding>
+    static constexpr size_t MaxBlockSize();
+
+    static constexpr int BaseHeadBlockSize() {
+        return sizeof(SkBlockAllocator) - offsetof(SkBlockAllocator, fHead);
+    }
+
+    // Append a new block to the end of the block linked list, updating fTail. 'minSize' must
+    // have enough room for sizeof(Block). 'maxSize' is the upper limit of fSize for the new block
+    // that will preserve the static guarantees SkBlockAllocator makes.
+    void addBlock(int minSize, int maxSize);
+
+    int scratchBlockSize() const { return fHead.fPrev ? fHead.fPrev->fSize : 0; }
+
+    Block* fTail; // All non-head blocks are heap allocated; tail will never be null.
+
+    // All remaining state is packed into 64 bits to keep SkBlockAllocator at 16 bytes + head block
+    // (on a 64-bit system).
+
+    // Growth of the block size is controlled by four factors: BlockIncrement, N0 and N1, and a
+    // policy defining how N0 is updated. When a new block is needed, we calculate N1' = N0 + N1.
+    // Depending on the policy, N0' = N0 (no growth or linear growth), or N0' = N1 (Fibonacci), or
+    // N0' = N1' (exponential). The size of the new block is N1' * BlockIncrement * MaxAlign,
+    // after which fN0 and fN1 store N0' and N1' clamped into 23 bits. With current bit allocations,
+    // N1' is limited to 2^24, and assuming MaxAlign=16, then BlockIncrement must be '2' in order to
+    // eventually reach the hard 2^29 size limit of SkBlockAllocator.
+
+    // Next heap block size = (fBlockIncrement * alignof(std::max_align_t) * (fN0 + fN1))
+    uint64_t fBlockIncrement : 16;
+    uint64_t fGrowthPolicy   : 2;  // GrowthPolicy
+    uint64_t fN0             : 23; // = 1 for linear/exp.; = 0 for fixed/fibonacci, initially
+    uint64_t fN1             : 23; // = 1 initially
+
+    // Inline head block, must be at the end so that it can utilize any additional reserved space
+    // from the initial allocation.
+    // The head block's prev pointer may be non-null, which signifies a scratch block that may be
+    // reused instead of allocating an entirely new block (this helps when allocate+release calls
+    // bounce back and forth across the capacity of a block).
+    alignas(kAddressAlign) Block fHead;
+
+    static_assert(kGrowthPolicyCount <= 4);
+};
+
+// A wrapper around SkBlockAllocator that includes preallocated storage for the head block.
+// N will be the preallocSize() reported by the allocator.
+template<size_t N>
+class SkSBlockAllocator : SkNoncopyable {
+public:
+    using GrowthPolicy = SkBlockAllocator::GrowthPolicy;
+
+    SkSBlockAllocator() {
+        new (fStorage) SkBlockAllocator(GrowthPolicy::kFixed, N, N - sizeof(SkBlockAllocator));
+    }
+    explicit SkSBlockAllocator(GrowthPolicy policy) {
+        new (fStorage) SkBlockAllocator(policy, N, N - sizeof(SkBlockAllocator));
+    }
+
+    SkSBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes) {
+        new (fStorage) SkBlockAllocator(policy, blockIncrementBytes, N - sizeof(SkBlockAllocator));
+    }
+
+    ~SkSBlockAllocator() {
+        this->allocator()->~SkBlockAllocator();
+    }
+
+    SkBlockAllocator* operator->() { return this->allocator(); }
+    const SkBlockAllocator* operator->() const { return this->allocator(); }
+
+    SkBlockAllocator* allocator() { return reinterpret_cast<SkBlockAllocator*>(fStorage); }
+    const SkBlockAllocator* allocator() const {
+        return reinterpret_cast<const SkBlockAllocator*>(fStorage);
+    }
+
+private:
+    static_assert(N >= sizeof(SkBlockAllocator));
+
+    // Will be used to placement new the allocator
+    alignas(SkBlockAllocator) char fStorage[N];
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Template and inline implementations
+
+SK_MAKE_BITFIELD_OPS(SkBlockAllocator::ReserveFlags)
+
+template<size_t Align, size_t Padding>
+constexpr size_t SkBlockAllocator::BlockOverhead() {
+    static_assert(SkAlignTo(kDataStart + Padding, Align) >= sizeof(Block));
+    return SkAlignTo(kDataStart + Padding, Align);
+}
+
+template<size_t Align, size_t Padding>
+constexpr size_t SkBlockAllocator::Overhead() {
+    // NOTE: On most platforms, SkBlockAllocator is packed; this is not the case on debug builds
+    // due to extra fields, or on WASM due to 4byte pointers but 16byte max align.
+    return std::max(sizeof(SkBlockAllocator),
+                    offsetof(SkBlockAllocator, fHead) + BlockOverhead<Align, Padding>());
+}
+
+template<size_t Align, size_t Padding>
+constexpr size_t SkBlockAllocator::MaxBlockSize() {
+    // Without loss of generality, assumes 'align' will be the largest encountered alignment for the
+    // allocator (if it's not, the largest align will be encountered by the compiler and pass/fail
+    // the same set of static asserts).
+    return BlockOverhead<Align, Padding>() + kMaxAllocationSize;
+}
+
+template<size_t Align, size_t Padding>
+void SkBlockAllocator::reserve(size_t size, ReserveFlags flags) {
+    if (size > kMaxAllocationSize) {
+        SK_ABORT("Allocation too large (%zu bytes requested)", size);
+    }
+    int iSize = (int) size;
+    if ((flags & kIgnoreExistingBytes_Flag) ||
+        this->currentBlock()->avail<Align, Padding>() < iSize) {
+
+        int blockSize = BlockOverhead<Align, Padding>() + iSize;
+        int maxSize = (flags & kIgnoreGrowthPolicy_Flag) ? blockSize
+                                                         : MaxBlockSize<Align, Padding>();
+        SkASSERT((size_t) maxSize <= (MaxBlockSize<Align, Padding>()));
+
+        SkDEBUGCODE(auto oldTail = fTail;)
+        this->addBlock(blockSize, maxSize);
+        SkASSERT(fTail != oldTail);
+        // Releasing the just added block will move it into scratch space, allowing the original
+        // tail's bytes to be used first before the scratch block is activated.
+        this->releaseBlock(fTail);
+    }
+}
+
+template <size_t Align, size_t Padding>
+SkBlockAllocator::ByteRange SkBlockAllocator::allocate(size_t size) {
+    // Amount of extra space for a new block to make sure the allocation can succeed.
+    static constexpr int kBlockOverhead = (int) BlockOverhead<Align, Padding>();
+
+    // Ensures 'offset' and 'end' calculations will be valid
+    static_assert((kMaxAllocationSize + SkAlignTo(MaxBlockSize<Align, Padding>(), Align))
+                        <= (size_t) std::numeric_limits<int32_t>::max());
+    // Ensures size + blockOverhead + addBlock's alignment operations will be valid
+    static_assert(kMaxAllocationSize + kBlockOverhead + ((1 << 12) - 1) // 4K align for large blocks
+                        <= std::numeric_limits<int32_t>::max());
+
+    if (size > kMaxAllocationSize) {
+        SK_ABORT("Allocation too large (%zu bytes requested)", size);
+    }
+
+    int iSize = (int) size;
+    int offset = fTail->cursor<Align, Padding>();
+    int end = offset + iSize;
+    if (end > fTail->fSize) {
+        this->addBlock(iSize + kBlockOverhead, MaxBlockSize<Align, Padding>());
+        offset = fTail->cursor<Align, Padding>();
+        end = offset + iSize;
+    }
+
+    // Check invariants
+    SkASSERT(end <= fTail->fSize);
+    SkASSERT(end - offset == iSize);
+    SkASSERT(offset - fTail->fCursor >= (int) Padding &&
+             offset - fTail->fCursor <= (int) (Padding + Align - 1));
+    SkASSERT(reinterpret_cast<uintptr_t>(fTail->ptr(offset)) % Align == 0);
+
+    int start = fTail->fCursor;
+    fTail->fCursor = end;
+
+    fTail->unpoisonRange(offset - Padding, end);
+
+    return {fTail, start, offset, end};
+}
+
+template <size_t Align, size_t Padding>
+SkBlockAllocator::Block* SkBlockAllocator::owningBlock(const void* p, int start) {
+    // 'p' was originally formed by aligning 'block + start + Padding', producing the inequality:
+    //     block + start + Padding <= p <= block + start + Padding + Align-1
+    // Rearranging this yields:
+    //     block <= p - start - Padding <= block + Align-1
+    // Masking these terms by ~(Align-1) reconstructs 'block' if the alignment of the block is
+    // greater than or equal to Align (since block & ~(Align-1) == (block + Align-1) & ~(Align-1)
+    // in that case). Overalignment does not reduce to inequality unfortunately.
+    if /* constexpr */ (Align <= kAddressAlign) {
+        Block* block = reinterpret_cast<Block*>(
+                (reinterpret_cast<uintptr_t>(p) - start - Padding) & ~(Align - 1));
+        SkASSERT(block->fSentinel == kAssignedMarker);
+        return block;
+    } else {
+        // There's not a constant-time expression available to reconstruct the block from 'p',
+        // but this is unlikely to happen frequently.
+        return this->findOwningBlock(p);
+    }
+}
+
+template <size_t Align, size_t Padding>
+int SkBlockAllocator::Block::alignedOffset(int offset) const {
+    static_assert(SkIsPow2(Align));
+    // Aligning adds (Padding + Align - 1) as an intermediate step, so ensure that can't overflow
+    static_assert(MaxBlockSize<Align, Padding>() + Padding + Align - 1
+                        <= (size_t) std::numeric_limits<int32_t>::max());
+
+    if /* constexpr */ (Align <= kAddressAlign) {
+        // Same as SkAlignTo, but operates on ints instead of size_t
+        return (offset + Padding + Align - 1) & ~(Align - 1);
+    } else {
+        // Must take into account that 'this' may be starting at a pointer that doesn't satisfy the
+        // larger alignment request, so must align the entire pointer, not just offset
+        uintptr_t blockPtr = reinterpret_cast<uintptr_t>(this);
+        uintptr_t alignedPtr = (blockPtr + offset + Padding + Align - 1) & ~(Align - 1);
+        SkASSERT(alignedPtr - blockPtr <= (uintptr_t) std::numeric_limits<int32_t>::max());
+        return (int) (alignedPtr - blockPtr);
+    }
+}
+
+bool SkBlockAllocator::Block::resize(int start, int end, int deltaBytes) {
+    SkASSERT(fSentinel == kAssignedMarker);
+    SkASSERT(start >= kDataStart && end <= fSize && start < end);
+
+    if (deltaBytes > kMaxAllocationSize || deltaBytes < -kMaxAllocationSize) {
+        // Cannot possibly satisfy the resize and could overflow subsequent math
+        return false;
+    }
+    if (fCursor == end) {
+        int nextCursor = end + deltaBytes;
+        SkASSERT(nextCursor >= start);
+        // We still check nextCursor >= start for release builds that wouldn't assert.
+        if (nextCursor <= fSize && nextCursor >= start) {
+            if (nextCursor < fCursor) {
+                // The allocation got smaller; poison the space that can no longer be used.
+                this->poisonRange(nextCursor + 1, end);
+            } else {
+                // The allocation got larger; unpoison the space that can now be used.
+                this->unpoisonRange(end, nextCursor);
+            }
+
+            fCursor = nextCursor;
+            return true;
+        }
+    }
+    return false;
+}
+
+// NOTE: release is equivalent to resize(start, end, start - end), and the compiler can optimize
+// most of the operations away, but it wasn't able to remove the unnecessary branch comparing the
+// new cursor to the block size or old start, so release() gets a specialization.
+bool SkBlockAllocator::Block::release(int start, int end) {
+    SkASSERT(fSentinel == kAssignedMarker);
+    SkASSERT(start >= kDataStart && end <= fSize && start < end);
+
+    this->poisonRange(start, end);
+
+    if (fCursor == end) {
+        fCursor = start;
+        return true;
+    } else {
+        return false;
+    }
+}
+
+///////// Block iteration
+template <bool Forward, bool Const>
+class SkBlockAllocator::BlockIter {
+private:
+    using BlockT = typename std::conditional<Const, const Block, Block>::type;
+    using AllocatorT =
+            typename std::conditional<Const, const SkBlockAllocator, SkBlockAllocator>::type;
+
+public:
+    BlockIter(AllocatorT* allocator) : fAllocator(allocator) {}
+
+    class Item {
+    public:
+        bool operator!=(const Item& other) const { return fBlock != other.fBlock; }
+
+        BlockT* operator*() const { return fBlock; }
+
+        Item& operator++() {
+            this->advance(fNext);
+            return *this;
+        }
+
+    private:
+        friend BlockIter;
+
+        Item(BlockT* block) { this->advance(block); }
+
+        void advance(BlockT* block) {
+            fBlock = block;
+            fNext = block ? (Forward ? block->fNext : block->fPrev) : nullptr;
+            if (!Forward && fNext && fNext->isScratch()) {
+                // For reverse-iteration only, we need to stop at the head, not the scratch block
+                // possibly stashed in head->prev.
+                fNext = nullptr;
+            }
+            SkASSERT(!fNext || !fNext->isScratch());
+        }
+
+        BlockT* fBlock;
+        // Cache this before operator++ so that fBlock can be released during iteration
+        BlockT* fNext;
+    };
+
+    Item begin() const { return Item(Forward ? &fAllocator->fHead : fAllocator->fTail); }
+    Item end() const { return Item(nullptr); }
+
+private:
+    AllocatorT* fAllocator;
+};
+
+SkBlockAllocator::BlockIter<true, false> SkBlockAllocator::blocks() {
+    return BlockIter<true, false>(this);
+}
+SkBlockAllocator::BlockIter<true, true> SkBlockAllocator::blocks() const {
+    return BlockIter<true, true>(this);
+}
+SkBlockAllocator::BlockIter<false, false> SkBlockAllocator::rblocks() {
+    return BlockIter<false, false>(this);
+}
+SkBlockAllocator::BlockIter<false, true> SkBlockAllocator::rblocks() const {
+    return BlockIter<false, true>(this);
+}
+
+#endif // SkBlockAllocator_DEFINED
diff --git a/gfx/skia/skia/src/base/SkBuffer.cpp b/gfx/skia/skia/src/base/SkBuffer.cpp
new file mode 100644
index 0000000000..bb39782215
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBuffer.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkBuffer.h"
+
+#include "include/private/base/SkAlign.h"
+#include "include/private/base/SkMalloc.h"
+
+#include <cstdint>
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+const void* SkRBuffer::skip(size_t size) {
+    if (fValid && size <= this->available()) {
+        const void* pos = fPos;
+        fPos += size;
+        return pos;
+    }
+    fValid = false;
+    return nullptr;
+}
+
+bool SkRBuffer::read(void* buffer, size_t size) {
+    if (const void* src = this->skip(size)) {
+        sk_careful_memcpy(buffer, src, size);
+        return true;
+    }
+    return false;
+}
+
+bool SkRBuffer::skipToAlign4() {
+    intptr_t pos = reinterpret_cast<intptr_t>(fPos);
+    size_t n = SkAlign4(pos) - pos;
+    if (fValid && n <= this->available()) {
+        fPos += n;
+        return true;
+    } else {
+        fValid = false;
+        return false;
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+void* SkWBuffer::skip(size_t size) {
+    void* result = fPos;
+    writeNoSizeCheck(nullptr, size);
+    return fData == nullptr ? nullptr : result;
+}
+
+void SkWBuffer::writeNoSizeCheck(const void* buffer, size_t size) {
+    SkASSERT(fData == nullptr || fStop == nullptr || fPos + size <= fStop);
+    if (fData && buffer) {
+        sk_careful_memcpy(fPos, buffer, size);
+    }
+    fPos += size;
+}
+
+size_t SkWBuffer::padToAlign4() {
+    size_t pos = this->pos();
+    size_t n = SkAlign4(pos) - pos;
+
+    if (n && fData)
+    {
+        char* p = fPos;
+        char* stop = p + n;
+        do {
+            *p++ = 0;
+        } while (p < stop);
+    }
+    fPos += n;
+    return n;
+}
+
+#if 0
+#ifdef SK_DEBUG
+    static void AssertBuffer32(const void* buffer)
+    {
+        SkASSERT(buffer);
+        SkASSERT(((size_t)buffer & 3) == 0);
+    }
+#else
+    #define AssertBuffer32(buffer)
+#endif
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkBuffer.h b/gfx/skia/skia/src/base/SkBuffer.h
new file mode 100644
index 0000000000..b30fda499d
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkBuffer.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkBuffer_DEFINED
+#define SkBuffer_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkNoncopyable.h"
+#include "src/base/SkSafeMath.h"
+
+#include <cstddef>
+#include <cstdint>
+
+typedef float SkScalar;
+
+/** \class SkRBuffer
+
+    Light weight class for reading data from a memory block.
+    The RBuffer is given the buffer to read from, with either a specified size
+    or no size (in which case no range checking is performed). It is iillegal
+    to attempt to read a value from an empty RBuffer (data == null).
+*/
+class SkRBuffer : SkNoncopyable {
+public:
+    SkRBuffer() : fData(nullptr), fPos(nullptr), fStop(nullptr) {}
+
+    /** Initialize RBuffer with a data point and length.
+    */
+    SkRBuffer(const void* data, size_t size) {
+        SkASSERT(data != nullptr || size == 0);
+        fData = (const char*)data;
+        fPos = (const char*)data;
+        fStop = (const char*)data + size;
+    }
+
+    /** Return the number of bytes that have been read from the beginning
+        of the data pointer.
+    */
+    size_t pos() const { return fPos - fData; }
+    /** Return the total size of the data pointer. Only defined if the length was
+        specified in the constructor or in a call to reset().
+    */
+    size_t size() const { return fStop - fData; }
+    /** Return true if the buffer has read to the end of the data pointer.
+        Only defined if the length was specified in the constructor or in a call
+        to reset(). Always returns true if the length was not specified.
+    */
+    bool eof() const { return fPos >= fStop; }
+
+    size_t available() const { return fStop - fPos; }
+
+    bool isValid() const { return fValid; }
+
+    /** Read the specified number of bytes from the data pointer. If buffer is not
+        null, copy those bytes into buffer.
+    */
+    bool read(void* buffer, size_t size);
+    bool skipToAlign4();
+
+    bool readU8(uint8_t* x)   { return this->read(x, 1); }
+    bool readS32(int32_t* x)  { return this->read(x, 4); }
+    bool readU32(uint32_t* x) { return this->read(x, 4); }
+
+    // returns nullptr on failure
+    const void* skip(size_t bytes);
+    template <typename T> const T* skipCount(size_t count) {
+        return static_cast<const T*>(this->skip(SkSafeMath::Mul(count, sizeof(T))));
+    }
+
+private:
+    const char* fData;
+    const char* fPos;
+    const char* fStop;
+    bool        fValid = true;
+};
+
+/** \class SkWBuffer
+
+    Light weight class for writing data to a memory block.
+    The WBuffer is given the buffer to write into, with either a specified size
+    or no size, in which case no range checking is performed. An empty WBuffer
+    is legal, in which case no data is ever written, but the relative pos()
+    is updated.
+*/
+class SkWBuffer : SkNoncopyable {
+public:
+    SkWBuffer() : fData(nullptr), fPos(nullptr), fStop(nullptr) {}
+    SkWBuffer(void* data) { reset(data); }
+    SkWBuffer(void* data, size_t size) { reset(data, size); }
+
+    void reset(void* data) {
+        fData = (char*)data;
+        fPos = (char*)data;
+        fStop = nullptr;  // no bounds checking
+    }
+
+    void reset(void* data, size_t size) {
+        SkASSERT(data != nullptr || size == 0);
+        fData = (char*)data;
+        fPos = (char*)data;
+        fStop = (char*)data + size;
+    }
+
+    size_t  pos() const { return fPos - fData; }
+    void*   skip(size_t size); // return start of skipped data
+
+    void write(const void* buffer, size_t size) {
+        if (size) {
+            this->writeNoSizeCheck(buffer, size);
+        }
+    }
+
+    size_t  padToAlign4();
+
+    void    writePtr(const void* x) { this->writeNoSizeCheck(&x, sizeof(x)); }
+    void    writeScalar(SkScalar x) { this->writeNoSizeCheck(&x, 4); }
+    void    write32(int32_t x) { this->writeNoSizeCheck(&x, 4); }
+    void    write16(int16_t x) { this->writeNoSizeCheck(&x, 2); }
+    void    write8(int8_t x) { this->writeNoSizeCheck(&x, 1); }
+    void    writeBool(bool x) { this->write8(x); }
+
+private:
+    void    writeNoSizeCheck(const void* buffer, size_t size);
+
+    char* fData;
+    char* fPos;
+    char* fStop;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkContainers.cpp b/gfx/skia/skia/src/base/SkContainers.cpp
new file mode 100644
index 0000000000..1e36a76ec4
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkContainers.cpp
@@ -0,0 +1,107 @@
+// Copyright 2019 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+
+#include "include/private/base/SkContainers.h"
+
+#include "include/private/base/SkAlign.h"
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkFeatures.h"
+#include "include/private/base/SkMalloc.h"
+#include "include/private/base/SkTo.h"
+
+#include <algorithm>
+#include <cstddef>
+
+#if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS)
+#include <malloc/malloc.h>
+#elif defined(SK_BUILD_FOR_ANDROID) || (defined(SK_BUILD_FOR_UNIX) && !defined(__OpenBSD__))
+#include <malloc.h>
+#elif defined(SK_BUILD_FOR_WIN)
+#include <malloc.h>
+#endif
+
+namespace {
+// Return at least as many bytes to keep malloc aligned.
+constexpr size_t kMinBytes = alignof(max_align_t);
+
+SkSpan<std::byte> complete_size(void* ptr, size_t size) {
+    if (ptr == nullptr) {
+        return {};
+    }
+
+    size_t completeSize = size;
+
+    // Use the OS specific calls to find the actual capacity.
+    #if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS)
+        // TODO: remove the max, when the chrome implementation of malloc_size doesn't return 0.
+        completeSize = std::max(malloc_size(ptr), size);
+    #elif defined(SK_BUILD_FOR_ANDROID) && __ANDROID_API__ >= 17
+        completeSize = malloc_usable_size(ptr);
+        SkASSERT(completeSize >= size);
+    #elif defined(SK_BUILD_FOR_UNIX) && !defined(__OpenBSD__)
+        completeSize = malloc_usable_size(ptr);
+        SkASSERT(completeSize >= size);
+    #elif defined(SK_BUILD_FOR_WIN)
+        completeSize = _msize(ptr);
+        SkASSERT(completeSize >= size);
+    #endif
+
+    return {static_cast<std::byte*>(ptr), completeSize};
+}
+}  // namespace
+
+SkSpan<std::byte> SkContainerAllocator::allocate(int capacity, double growthFactor) {
+    SkASSERT(capacity >= 0);
+    SkASSERT(growthFactor >= 1.0);
+    SkASSERT_RELEASE(capacity <= fMaxCapacity);
+
+    if (growthFactor > 1.0 && capacity > 0) {
+        capacity = this->growthFactorCapacity(capacity, growthFactor);
+    }
+
+    return sk_allocate_throw(capacity * fSizeOfT);
+}
+
+size_t SkContainerAllocator::roundUpCapacity(int64_t capacity) const {
+    SkASSERT(capacity >= 0);
+
+    // If round will not go above fMaxCapacity return rounded capacity.
+    if (capacity < fMaxCapacity - kCapacityMultiple) {
+        return SkAlignTo(capacity, kCapacityMultiple);
+    }
+
+    return SkToSizeT(fMaxCapacity);
+}
+
+size_t SkContainerAllocator::growthFactorCapacity(int capacity, double growthFactor) const {
+    SkASSERT(capacity >= 0);
+    SkASSERT(growthFactor >= 1.0);
+    // Multiply by the growthFactor. Remember this must be done in 64-bit ints and not
+    // size_t because size_t changes.
+    const int64_t capacityGrowth = static_cast<int64_t>(capacity * growthFactor);
+
+    // Notice that for small values of capacity, rounding up will provide most of the growth.
+    return this->roundUpCapacity(capacityGrowth);
+}
+
+
+SkSpan<std::byte> sk_allocate_canfail(size_t size) {
+    // Make sure to ask for at least the minimum number of bytes.
+    const size_t adjustedSize = std::max(size, kMinBytes);
+    void* ptr = sk_malloc_canfail(adjustedSize);
+    return complete_size(ptr, adjustedSize);
+}
+
+SkSpan<std::byte> sk_allocate_throw(size_t size) {
+    if (size == 0) {
+        return {};
+    }
+    // Make sure to ask for at least the minimum number of bytes.
+    const size_t adjustedSize = std::max(size, kMinBytes);
+    void* ptr = sk_malloc_throw(adjustedSize);
+    return complete_size(ptr, adjustedSize);
+}
+
+void sk_report_container_overflow_and_die() {
+    SK_ABORT("Requested capacity is too large.");
+}
diff --git a/gfx/skia/skia/src/base/SkCubics.cpp b/gfx/skia/skia/src/base/SkCubics.cpp
new file mode 100644
index 0000000000..64a4beb007
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkCubics.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkCubics.h"
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkFloatingPoint.h"
+#include "include/private/base/SkTPin.h"
+#include "src/base/SkQuads.h"
+
+#include <algorithm>
+#include <cmath>
+
+static constexpr double PI = 3.141592653589793;
+
+static bool nearly_equal(double x, double y) {
+    if (sk_double_nearly_zero(x)) {
+        return sk_double_nearly_zero(y);
+    }
+    return sk_doubles_nearly_equal_ulps(x, y);
+}
+
+// When the A coefficient of a cubic is close to 0, there can be floating point error
+// that arises from computing a very large root. In those cases, we would rather be
+// precise about the smaller 2 roots, so we have this arbitrary cutoff for when A is
+// really small or small compared to B.
+static bool close_to_a_quadratic(double A, double B) {
+    if (sk_double_nearly_zero(B)) {
+        return sk_double_nearly_zero(A);
+    }
+    return std::abs(A / B) < 1.0e-7;
+}
+
+int SkCubics::RootsReal(double A, double B, double C, double D, double solution[3]) {
+    if (close_to_a_quadratic(A, B)) {
+        return SkQuads::RootsReal(B, C, D, solution);
+    }
+    if (sk_double_nearly_zero(D)) {  // 0 is one root
+        int num = SkQuads::RootsReal(A, B, C, solution);
+        for (int i = 0; i < num; ++i) {
+            if (sk_double_nearly_zero(solution[i])) {
+                return num;
+            }
+        }
+        solution[num++] = 0;
+        return num;
+    }
+    if (sk_double_nearly_zero(A + B + C + D)) {  // 1 is one root
+        int num = SkQuads::RootsReal(A, A + B, -D, solution);
+        for (int i = 0; i < num; ++i) {
+            if (sk_doubles_nearly_equal_ulps(solution[i], 1)) {
+                return num;
+            }
+        }
+        solution[num++] = 1;
+        return num;
+    }
+    double a, b, c;
+    {
+        // If A is zero (e.g. B was nan and thus close_to_a_quadratic was false), we will
+        // temporarily have infinities rolling about, but will catch that when checking
+        // R2MinusQ3.
+        double invA = sk_ieee_double_divide(1, A);
+        a = B * invA;
+        b = C * invA;
+        c = D * invA;
+    }
+    double a2 = a * a;
+    double Q = (a2 - b * 3) / 9;
+    double R = (2 * a2 * a - 9 * a * b + 27 * c) / 54;
+    double R2 = R * R;
+    double Q3 = Q * Q * Q;
+    double R2MinusQ3 = R2 - Q3;
+    // If one of R2 Q3 is infinite or nan, subtracting them will also be infinite/nan.
+    // If both are infinite or nan, the subtraction will be nan.
+    // In either case, we have no finite roots.
+    if (!std::isfinite(R2MinusQ3)) {
+        return 0;
+    }
+    double adiv3 = a / 3;
+    double r;
+    double* roots = solution;
+    if (R2MinusQ3 < 0) {   // we have 3 real roots
+        // the divide/root can, due to finite precisions, be slightly outside of -1...1
+        const double theta = acos(SkTPin(R / std::sqrt(Q3), -1., 1.));
+        const double neg2RootQ = -2 * std::sqrt(Q);
+
+        r = neg2RootQ * cos(theta / 3) - adiv3;
+        *roots++ = r;
+
+        r = neg2RootQ * cos((theta + 2 * PI) / 3) - adiv3;
+        if (!nearly_equal(solution[0], r)) {
+            *roots++ = r;
+        }
+        r = neg2RootQ * cos((theta - 2 * PI) / 3) - adiv3;
+        if (!nearly_equal(solution[0], r) &&
+            (roots - solution == 1 || !nearly_equal(solution[1], r))) {
+            *roots++ = r;
+        }
+    } else {  // we have 1 real root
+        const double sqrtR2MinusQ3 = std::sqrt(R2MinusQ3);
+        A = fabs(R) + sqrtR2MinusQ3;
+        A = std::cbrt(A); // cube root
+        if (R > 0) {
+            A = -A;
+        }
+        if (!sk_double_nearly_zero(A)) {
+            A += Q / A;
+        }
+        r = A - adiv3;
+        *roots++ = r;
+        if (!sk_double_nearly_zero(R2) &&
+             sk_doubles_nearly_equal_ulps(R2, Q3)) {
+            r = -A / 2 - adiv3;
+            if (!nearly_equal(solution[0], r)) {
+                *roots++ = r;
+            }
+        }
+    }
+    return static_cast<int>(roots - solution);
+}
+
+int SkCubics::RootsValidT(double A, double B, double C, double D,
+                          double solution[3]) {
+    double allRoots[3] = {0, 0, 0};
+    int realRoots = SkCubics::RootsReal(A, B, C, D, allRoots);
+    int foundRoots = 0;
+    for (int index = 0; index < realRoots; ++index) {
+        double tValue = allRoots[index];
+        if (tValue >= 1.0 && tValue <= 1.00005) {
+            // Make sure we do not already have 1 (or something very close) in the list of roots.
+            if ((foundRoots < 1 || !sk_doubles_nearly_equal_ulps(solution[0], 1)) &&
+                (foundRoots < 2 || !sk_doubles_nearly_equal_ulps(solution[1], 1))) {
+                solution[foundRoots++] = 1;
+            }
+        } else if (tValue >= -0.00005 && (tValue <= 0.0 || sk_double_nearly_zero(tValue))) {
+            // Make sure we do not already have 0 (or something very close) in the list of roots.
+            if ((foundRoots < 1 || !sk_double_nearly_zero(solution[0])) &&
+                (foundRoots < 2 || !sk_double_nearly_zero(solution[1]))) {
+                solution[foundRoots++] = 0;
+            }
+        } else if (tValue > 0.0 && tValue < 1.0) {
+            solution[foundRoots++] = tValue;
+        }
+    }
+    return foundRoots;
+}
+
+static bool approximately_zero(double x) {
+    // This cutoff for our binary search hopefully strikes a good balance between
+    // performance and accuracy.
+    return std::abs(x) < 0.00000001;
+}
+
+static int find_extrema_valid_t(double A, double B, double C,
+                                double t[2]) {
+    // To find the local min and max of a cubic, we take the derivative and
+    // solve when that is equal to 0.
+    // d/dt (A*t^3 + B*t^2 + C*t + D) = 3A*t^2 + 2B*t + C
+    double roots[2] = {0, 0};
+    int numRoots = SkQuads::RootsReal(3*A, 2*B, C, roots);
+    int validRoots = 0;
+    for (int i = 0; i < numRoots; i++) {
+        double tValue = roots[i];
+        if (tValue >= 0 && tValue <= 1.0) {
+            t[validRoots++] = tValue;
+        }
+    }
+    return validRoots;
+}
+
+static double binary_search(double A, double B, double C, double D, double start, double stop) {
+    SkASSERT(start <= stop);
+    double left = SkCubics::EvalAt(A, B, C, D, start);
+    if (approximately_zero(left)) {
+        return start;
+    }
+    double right = SkCubics::EvalAt(A, B, C, D, stop);
+    if (!std::isfinite(left) || !std::isfinite(right)) {
+        return -1; // Not going to deal with one or more endpoints being non-finite.
+    }
+    if ((left > 0 && right > 0) || (left < 0 && right < 0)) {
+        return -1; // We can only have a root if one is above 0 and the other is below 0.
+    }
+
+    constexpr int maxIterations = 1000; // prevent infinite loop
+    for (int i = 0; i < maxIterations; i++) {
+        double step = (start + stop) / 2;
+        double curr = SkCubics::EvalAt(A, B, C, D, step);
+        if (approximately_zero(curr)) {
+            return step;
+        }
+        if ((curr < 0 && left < 0) || (curr > 0 && left > 0)) {
+            // go right
+            start = step;
+        } else {
+            // go left
+            stop = step;
+        }
+    }
+    return -1;
+}
+
+int SkCubics::BinarySearchRootsValidT(double A, double B, double C, double D,
+                                      double solution[3]) {
+    if (!std::isfinite(A) || !std::isfinite(B) || !std::isfinite(C) || !std::isfinite(D)) {
+        return 0;
+    }
+    double regions[4] = {0, 0, 0, 1};
+    // Find local minima and maxima
+    double minMax[2] = {0, 0};
+    int extremaCount = find_extrema_valid_t(A, B, C, minMax);
+    int startIndex = 2 - extremaCount;
+    if (extremaCount == 1) {
+        regions[startIndex + 1] = minMax[0];
+    }
+    if (extremaCount == 2) {
+        // While the roots will be in the range 0 to 1 inclusive, they might not be sorted.
+        regions[startIndex + 1] = std::min(minMax[0], minMax[1]);
+        regions[startIndex + 2] = std::max(minMax[0], minMax[1]);
+    }
+    // Starting at regions[startIndex] and going up through regions[3], we have
+    // an ascending list of numbers in the range 0 to 1.0, between which are the possible
+    // locations of a root.
+    int foundRoots = 0;
+    for (;startIndex < 3; startIndex++) {
+        double root = binary_search(A, B, C, D, regions[startIndex], regions[startIndex + 1]);
+        if (root >= 0) {
+            // Check for duplicates
+            if ((foundRoots < 1 || !approximately_zero(solution[0] - root)) &&
+                (foundRoots < 2 || !approximately_zero(solution[1] - root))) {
+                solution[foundRoots++] = root;
+            }
+        }
+    }
+    return foundRoots;
+}
diff --git a/gfx/skia/skia/src/base/SkCubics.h b/gfx/skia/skia/src/base/SkCubics.h
new file mode 100644
index 0000000000..7e3cbbb567
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkCubics.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkCubics_DEFINED
+#define SkCubics_DEFINED
+
+/**
+ * Utilities for dealing with cubic formulas with one variable:
+ *   f(t) = A*t^3 + B*t^2 + C*t + d
+ */
+class SkCubics {
+public:
+    /**
+     * Puts up to 3 real solutions to the equation
+     *   A*t^3 + B*t^2 + C*t + d = 0
+     * in the provided array and returns how many roots that was.
+     */
+    static int RootsReal(double A, double B, double C, double D,
+                         double solution[3]);
+
+    /**
+     * Puts up to 3 real solutions to the equation
+     *   A*t^3 + B*t^2 + C*t + D = 0
+     * in the provided array, with the constraint that t is in the range [0.0, 1.0],
+     * and returns how many roots that was.
+     */
+    static int RootsValidT(double A, double B, double C, double D,
+                           double solution[3]);
+
+
+    /**
+     * Puts up to 3 real solutions to the equation
+     *   A*t^3 + B*t^2 + C*t + D = 0
+     * in the provided array, with the constraint that t is in the range [0.0, 1.0],
+     * and returns how many roots that was.
+     * This is a slower method than RootsValidT, but more accurate in circumstances
+     * where floating point error gets too big.
+     */
+    static int BinarySearchRootsValidT(double A, double B, double C, double D,
+                                       double solution[3]);
+
+    /**
+     * Evaluates the cubic function with the 4 provided coefficients and the
+     * provided variable.
+     */
+    static double EvalAt(double A, double B, double C, double D, double t) {
+        return A * t * t * t +
+               B * t * t +
+               C * t +
+               D;
+    }
+
+    static double EvalAt(double coefficients[4], double t) {
+        return EvalAt(coefficients[0], coefficients[1], coefficients[2], coefficients[3], t);
+    }
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkDeque.cpp b/gfx/skia/skia/src/base/SkDeque.cpp
new file mode 100644
index 0000000000..ffff336f90
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkDeque.cpp
@@ -0,0 +1,310 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDeque.h"
+#include "include/private/base/SkMalloc.h"
+
+#include <cstddef>
+
+struct SkDeque::Block {
+    Block*  fNext;
+    Block*  fPrev;
+    char*   fBegin; // start of used section in this chunk
+    char*   fEnd;   // end of used section in this chunk
+    char*   fStop;  // end of the allocated chunk
+
+    char*       start() { return (char*)(this + 1); }
+    const char* start() const { return (const char*)(this + 1); }
+
+    void init(size_t size) {
+        fNext   = fPrev = nullptr;
+        fBegin  = fEnd = nullptr;
+        fStop   = (char*)this + size;
+    }
+};
+
+SkDeque::SkDeque(size_t elemSize, int allocCount)
+        : fElemSize(elemSize)
+        , fInitialStorage(nullptr)
+        , fCount(0)
+        , fAllocCount(allocCount) {
+    SkASSERT(allocCount >= 1);
+    fFrontBlock = fBackBlock = nullptr;
+    fFront = fBack = nullptr;
+}
+
+SkDeque::SkDeque(size_t elemSize, void* storage, size_t storageSize, int allocCount)
+        : fElemSize(elemSize)
+        , fInitialStorage(storage)
+        , fCount(0)
+        , fAllocCount(allocCount) {
+    SkASSERT(storageSize == 0 || storage != nullptr);
+    SkASSERT(allocCount >= 1);
+
+    if (storageSize >= sizeof(Block) + elemSize) {
+        fFrontBlock = (Block*)storage;
+        fFrontBlock->init(storageSize);
+    } else {
+        fFrontBlock = nullptr;
+    }
+    fBackBlock = fFrontBlock;
+    fFront = fBack = nullptr;
+}
+
+SkDeque::~SkDeque() {
+    Block* head = fFrontBlock;
+    Block* initialHead = (Block*)fInitialStorage;
+
+    while (head) {
+        Block* next = head->fNext;
+        if (head != initialHead) {
+            this->freeBlock(head);
+        }
+        head = next;
+    }
+}
+
+void* SkDeque::push_front() {
+    fCount += 1;
+
+    if (nullptr == fFrontBlock) {
+        fFrontBlock = this->allocateBlock(fAllocCount);
+        fBackBlock = fFrontBlock;     // update our linklist
+    }
+
+    Block*  first = fFrontBlock;
+    char*   begin;
+
+    if (nullptr == first->fBegin) {
+    INIT_CHUNK:
+        first->fEnd = first->fStop;
+        begin = first->fStop - fElemSize;
+    } else {
+        begin = first->fBegin - fElemSize;
+        if (begin < first->start()) {    // no more room in this chunk
+            // should we alloc more as we accumulate more elements?
+            first = this->allocateBlock(fAllocCount);
+            first->fNext = fFrontBlock;
+            fFrontBlock->fPrev = first;
+            fFrontBlock = first;
+            goto INIT_CHUNK;
+        }
+    }
+
+    first->fBegin = begin;
+
+    if (nullptr == fFront) {
+        SkASSERT(nullptr == fBack);
+        fFront = fBack = begin;
+    } else {
+        SkASSERT(fBack);
+        fFront = begin;
+    }
+
+    return begin;
+}
+
+void* SkDeque::push_back() {
+    fCount += 1;
+
+    if (nullptr == fBackBlock) {
+        fBackBlock = this->allocateBlock(fAllocCount);
+        fFrontBlock = fBackBlock; // update our linklist
+    }
+
+    Block*  last = fBackBlock;
+    char*   end;
+
+    if (nullptr == last->fBegin) {
+    INIT_CHUNK:
+        last->fBegin = last->start();
+        end = last->fBegin + fElemSize;
+    } else {
+        end = last->fEnd + fElemSize;
+        if (end > last->fStop) {  // no more room in this chunk
+            // should we alloc more as we accumulate more elements?
+            last = this->allocateBlock(fAllocCount);
+            last->fPrev = fBackBlock;
+            fBackBlock->fNext = last;
+            fBackBlock = last;
+            goto INIT_CHUNK;
+        }
+    }
+
+    last->fEnd = end;
+    end -= fElemSize;
+
+    if (nullptr == fBack) {
+        SkASSERT(nullptr == fFront);
+        fFront = fBack = end;
+    } else {
+        SkASSERT(fFront);
+        fBack = end;
+    }
+
+    return end;
+}
+
+void SkDeque::pop_front() {
+    SkASSERT(fCount > 0);
+    fCount -= 1;
+
+    Block*  first = fFrontBlock;
+
+    SkASSERT(first != nullptr);
+
+    if (first->fBegin == nullptr) {  // we were marked empty from before
+        first = first->fNext;
+        SkASSERT(first != nullptr);    // else we popped too far
+        first->fPrev = nullptr;
+        this->freeBlock(fFrontBlock);
+        fFrontBlock = first;
+    }
+
+    char* begin = first->fBegin + fElemSize;
+    SkASSERT(begin <= first->fEnd);
+
+    if (begin < fFrontBlock->fEnd) {
+        first->fBegin = begin;
+        SkASSERT(first->fBegin);
+        fFront = first->fBegin;
+    } else {
+        first->fBegin = first->fEnd = nullptr;  // mark as empty
+        if (nullptr == first->fNext) {
+            fFront = fBack = nullptr;
+        } else {
+            SkASSERT(first->fNext->fBegin);
+            fFront = first->fNext->fBegin;
+        }
+    }
+}
+
+void SkDeque::pop_back() {
+    SkASSERT(fCount > 0);
+    fCount -= 1;
+
+    Block* last = fBackBlock;
+
+    SkASSERT(last != nullptr);
+
+    if (last->fEnd == nullptr) {  // we were marked empty from before
+        last = last->fPrev;
+        SkASSERT(last != nullptr);  // else we popped too far
+        last->fNext = nullptr;
+        this->freeBlock(fBackBlock);
+        fBackBlock = last;
+    }
+
+    char* end = last->fEnd - fElemSize;
+    SkASSERT(end >= last->fBegin);
+
+    if (end > last->fBegin) {
+        last->fEnd = end;
+        SkASSERT(last->fEnd);
+        fBack = last->fEnd - fElemSize;
+    } else {
+        last->fBegin = last->fEnd = nullptr;    // mark as empty
+        if (nullptr == last->fPrev) {
+            fFront = fBack = nullptr;
+        } else {
+            SkASSERT(last->fPrev->fEnd);
+            fBack = last->fPrev->fEnd - fElemSize;
+        }
+    }
+}
+
+int SkDeque::numBlocksAllocated() const {
+    int numBlocks = 0;
+
+    for (const Block* temp = fFrontBlock; temp; temp = temp->fNext) {
+        ++numBlocks;
+    }
+
+    return numBlocks;
+}
+
+SkDeque::Block* SkDeque::allocateBlock(int allocCount) {
+    Block* newBlock = (Block*)sk_malloc_throw(sizeof(Block) + allocCount * fElemSize);
+    newBlock->init(sizeof(Block) + allocCount * fElemSize);
+    return newBlock;
+}
+
+void SkDeque::freeBlock(Block* block) {
+    sk_free(block);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SkDeque::Iter::Iter() : fCurBlock(nullptr), fPos(nullptr), fElemSize(0) {}
+
+SkDeque::Iter::Iter(const SkDeque& d, IterStart startLoc) {
+    this->reset(d, startLoc);
+}
+
+// Due to how reset and next work, next actually returns the current element
+// pointed to by fPos and then updates fPos to point to the next one.
+void* SkDeque::Iter::next() {
+    char* pos = fPos;
+
+    if (pos) {   // if we were valid, try to move to the next setting
+        char* next = pos + fElemSize;
+        SkASSERT(next <= fCurBlock->fEnd);
+        if (next == fCurBlock->fEnd) { // exhausted this chunk, move to next
+            do {
+                fCurBlock = fCurBlock->fNext;
+            } while (fCurBlock != nullptr && fCurBlock->fBegin == nullptr);
+            next = fCurBlock ? fCurBlock->fBegin : nullptr;
+        }
+        fPos = next;
+    }
+    return pos;
+}
+
+// Like next, prev actually returns the current element pointed to by fPos and
+// then makes fPos point to the previous element.
+void* SkDeque::Iter::prev() {
+    char* pos = fPos;
+
+    if (pos) {   // if we were valid, try to move to the prior setting
+        char* prev = pos - fElemSize;
+        SkASSERT(prev >= fCurBlock->fBegin - fElemSize);
+        if (prev < fCurBlock->fBegin) { // exhausted this chunk, move to prior
+            do {
+                fCurBlock = fCurBlock->fPrev;
+            } while (fCurBlock != nullptr && fCurBlock->fEnd == nullptr);
+            prev = fCurBlock ? fCurBlock->fEnd - fElemSize : nullptr;
+        }
+        fPos = prev;
+    }
+    return pos;
+}
+
+// reset works by skipping through the spare blocks at the start (or end)
+// of the doubly linked list until a non-empty one is found. The fPos
+// member is then set to the first (or last) element in the block. If
+// there are no elements in the deque both fCurBlock and fPos will come
+// out of this routine nullptr.
+void SkDeque::Iter::reset(const SkDeque& d, IterStart startLoc) {
+    fElemSize = d.fElemSize;
+
+    if (kFront_IterStart == startLoc) {
+        // initialize the iterator to start at the front
+        fCurBlock = d.fFrontBlock;
+        while (fCurBlock && nullptr == fCurBlock->fBegin) {
+            fCurBlock = fCurBlock->fNext;
+        }
+        fPos = fCurBlock ? fCurBlock->fBegin : nullptr;
+    } else {
+        // initialize the iterator to start at the back
+        fCurBlock = d.fBackBlock;
+        while (fCurBlock && nullptr == fCurBlock->fEnd) {
+            fCurBlock = fCurBlock->fPrev;
+        }
+        fPos = fCurBlock ? fCurBlock->fEnd - fElemSize : nullptr;
+    }
+}
diff --git a/gfx/skia/skia/src/base/SkEndian.h b/gfx/skia/skia/src/base/SkEndian.h
new file mode 100644
index 0000000000..732c248802
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkEndian.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkEndian_DEFINED
+#define SkEndian_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkFeatures.h"
+
+#include <cstdint>
+
+/** \file SkEndian.h
+
+    Macros and helper functions for handling 16 and 32 bit values in
+    big and little endian formats.
+*/
+
+#if defined(SK_CPU_LENDIAN) && defined(SK_CPU_BENDIAN)
+    #error "can't have both LENDIAN and BENDIAN defined"
+#endif
+
+#if !defined(SK_CPU_LENDIAN) && !defined(SK_CPU_BENDIAN)
+    #error "need either LENDIAN or BENDIAN defined"
+#endif
+
+/** Swap the two bytes in the low 16bits of the parameters.
+    e.g. 0x1234 -> 0x3412
+*/
+static inline uint16_t SkEndianSwap16(uint16_t value) {
+    return static_cast<uint16_t>((value >> 8) | ((value & 0xFF) << 8));
+}
+
+template<uint16_t N> struct SkTEndianSwap16 {
+    static const uint16_t value = static_cast<uint16_t>((N >> 8) | ((N & 0xFF) << 8));
+};
+
+/** Vector version of SkEndianSwap16(), which swaps the
+    low two bytes of each value in the array.
+*/
+static inline void SkEndianSwap16s(uint16_t array[], int count) {
+    SkASSERT(count == 0 || array != nullptr);
+
+    while (--count >= 0) {
+        *array = SkEndianSwap16(*array);
+        array += 1;
+    }
+}
+
+/** Reverse all 4 bytes in a 32bit value.
+    e.g. 0x12345678 -> 0x78563412
+*/
+static constexpr uint32_t SkEndianSwap32(uint32_t value) {
+    return ((value & 0xFF) << 24) |
+           ((value & 0xFF00) << 8) |
+           ((value & 0xFF0000) >> 8) |
+            (value >> 24);
+}
+
+template<uint32_t N> struct SkTEndianSwap32 {
+    static const uint32_t value = ((N & 0xFF) << 24) |
+                                  ((N & 0xFF00) << 8) |
+                                  ((N & 0xFF0000) >> 8) |
+                                  (N >> 24);
+};
+
+/** Vector version of SkEndianSwap32(), which swaps the
+    bytes of each value in the array.
+*/
+static inline void SkEndianSwap32s(uint32_t array[], int count) {
+    SkASSERT(count == 0 || array != nullptr);
+
+    while (--count >= 0) {
+        *array = SkEndianSwap32(*array);
+        array += 1;
+    }
+}
+
+/** Reverse all 8 bytes in a 64bit value.
+    e.g. 0x1122334455667788 -> 0x8877665544332211
+*/
+static inline uint64_t SkEndianSwap64(uint64_t value) {
+    return (((value & 0x00000000000000FFULL) << (8*7)) |
+            ((value & 0x000000000000FF00ULL) << (8*5)) |
+            ((value & 0x0000000000FF0000ULL) << (8*3)) |
+            ((value & 0x00000000FF000000ULL) << (8*1)) |
+            ((value & 0x000000FF00000000ULL) >> (8*1)) |
+            ((value & 0x0000FF0000000000ULL) >> (8*3)) |
+            ((value & 0x00FF000000000000ULL) >> (8*5)) |
+            ((value)                         >> (8*7)));
+}
+template<uint64_t N> struct SkTEndianSwap64 {
+    static const uint64_t value = (((N & 0x00000000000000FFULL) << (8*7)) |
+                                   ((N & 0x000000000000FF00ULL) << (8*5)) |
+                                   ((N & 0x0000000000FF0000ULL) << (8*3)) |
+                                   ((N & 0x00000000FF000000ULL) << (8*1)) |
+                                   ((N & 0x000000FF00000000ULL) >> (8*1)) |
+                                   ((N & 0x0000FF0000000000ULL) >> (8*3)) |
+                                   ((N & 0x00FF000000000000ULL) >> (8*5)) |
+                                   ((N)                         >> (8*7)));
+};
+
+/** Vector version of SkEndianSwap64(), which swaps the
+    bytes of each value in the array.
+*/
+static inline void SkEndianSwap64s(uint64_t array[], int count) {
+    SkASSERT(count == 0 || array != nullptr);
+
+    while (--count >= 0) {
+        *array = SkEndianSwap64(*array);
+        array += 1;
+    }
+}
+
+#ifdef SK_CPU_LENDIAN
+    #define SkEndian_SwapBE16(n)    SkEndianSwap16(n)
+    #define SkEndian_SwapBE32(n)    SkEndianSwap32(n)
+    #define SkEndian_SwapBE64(n)    SkEndianSwap64(n)
+    #define SkEndian_SwapLE16(n)    (n)
+    #define SkEndian_SwapLE32(n)    (n)
+    #define SkEndian_SwapLE64(n)    (n)
+
+    #define SkTEndian_SwapBE16(n)    SkTEndianSwap16<n>::value
+    #define SkTEndian_SwapBE32(n)    SkTEndianSwap32<n>::value
+    #define SkTEndian_SwapBE64(n)    SkTEndianSwap64<n>::value
+    #define SkTEndian_SwapLE16(n)    (n)
+    #define SkTEndian_SwapLE32(n)    (n)
+    #define SkTEndian_SwapLE64(n)    (n)
+#else   // SK_CPU_BENDIAN
+    #define SkEndian_SwapBE16(n)    (n)
+    #define SkEndian_SwapBE32(n)    (n)
+    #define SkEndian_SwapBE64(n)    (n)
+    #define SkEndian_SwapLE16(n)    SkEndianSwap16(n)
+    #define SkEndian_SwapLE32(n)    SkEndianSwap32(n)
+    #define SkEndian_SwapLE64(n)    SkEndianSwap64(n)
+
+    #define SkTEndian_SwapBE16(n)    (n)
+    #define SkTEndian_SwapBE32(n)    (n)
+    #define SkTEndian_SwapBE64(n)    (n)
+    #define SkTEndian_SwapLE16(n)    SkTEndianSwap16<n>::value
+    #define SkTEndian_SwapLE32(n)    SkTEndianSwap32<n>::value
+    #define SkTEndian_SwapLE64(n)    SkTEndianSwap64<n>::value
+#endif
+
+// When a bytestream is embedded in a 32-bit word, how far we need to
+// shift the word to extract each byte from the low 8 bits by anding with 0xff.
+#ifdef SK_CPU_LENDIAN
+    #define SkEndian_Byte0Shift 0
+    #define SkEndian_Byte1Shift 8
+    #define SkEndian_Byte2Shift 16
+    #define SkEndian_Byte3Shift 24
+#else   // SK_CPU_BENDIAN
+    #define SkEndian_Byte0Shift 24
+    #define SkEndian_Byte1Shift 16
+    #define SkEndian_Byte2Shift 8
+    #define SkEndian_Byte3Shift 0
+#endif
+
+
+#if defined(SK_UINT8_BITFIELD_LENDIAN) && defined(SK_UINT8_BITFIELD_BENDIAN)
+    #error "can't have both bitfield LENDIAN and BENDIAN defined"
+#endif
+
+#if !defined(SK_UINT8_BITFIELD_LENDIAN) && !defined(SK_UINT8_BITFIELD_BENDIAN)
+    #ifdef SK_CPU_LENDIAN
+        #define SK_UINT8_BITFIELD_LENDIAN
+    #else
+        #define SK_UINT8_BITFIELD_BENDIAN
+    #endif
+#endif
+
+#ifdef SK_UINT8_BITFIELD_LENDIAN
+    #define SK_UINT8_BITFIELD(f0, f1, f2, f3, f4, f5, f6, f7) \
+        SK_OT_BYTE f0 : 1; \
+        SK_OT_BYTE f1 : 1; \
+        SK_OT_BYTE f2 : 1; \
+        SK_OT_BYTE f3 : 1; \
+        SK_OT_BYTE f4 : 1; \
+        SK_OT_BYTE f5 : 1; \
+        SK_OT_BYTE f6 : 1; \
+        SK_OT_BYTE f7 : 1;
+#else
+    #define SK_UINT8_BITFIELD(f0, f1, f2, f3, f4, f5, f6, f7) \
+        SK_OT_BYTE f7 : 1; \
+        SK_OT_BYTE f6 : 1; \
+        SK_OT_BYTE f5 : 1; \
+        SK_OT_BYTE f4 : 1; \
+        SK_OT_BYTE f3 : 1; \
+        SK_OT_BYTE f2 : 1; \
+        SK_OT_BYTE f1 : 1; \
+        SK_OT_BYTE f0 : 1;
+#endif
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkFloatingPoint.cpp b/gfx/skia/skia/src/base/SkFloatingPoint.cpp
new file mode 100644
index 0000000000..3e3d91d6e5
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkFloatingPoint.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkFloatingPoint.h"
+
+#include "include/private/base/SkAssert.h"
+
+#include <cmath>
+
+static inline int64_t double_to_twos_complement_bits(double x) {
+    // Convert a double to its bit pattern
+    int64_t bits = 0;
+    static_assert(sizeof(x) == sizeof(bits));
+    std::memcpy(&bits, &x, sizeof(bits));
+    // Convert a sign-bit int (i.e. double interpreted as int) into a 2s complement
+    // int. This also converts -0 (0x8000000000000000) to 0. Doing this to a double allows
+    // it to be compared using normal C operators (<, <=, etc.)
+    if (bits < 0) {
+        bits &= 0x7FFFFFFFFFFFFFFF;
+        bits = -bits;
+    }
+    return bits;
+}
+
+// Arbitrarily chosen.
+constexpr static double sk_double_epsilon = 0.0000000001;
+
+bool sk_doubles_nearly_equal_ulps(double a, double b, uint8_t max_ulps_diff) {
+    // If both of these are zero (or very close), then using Units of Least Precision
+    // will not be accurate and we should use sk_double_nearly_zero instead.
+    SkASSERT(!(fabs(a) < sk_double_epsilon && fabs(b) < sk_double_epsilon));
+    // This algorithm does not work if both inputs are NaN.
+    SkASSERT(!(std::isnan(a) && std::isnan(b)));
+    // If both inputs are infinity (or actually equal), this catches it.
+    if (a == b) {
+        return true;
+    }
+    int64_t aBits = double_to_twos_complement_bits(a);
+    int64_t bBits = double_to_twos_complement_bits(b);
+
+    // Find the difference in Units of Least Precision (ULPs).
+    return aBits < bBits + max_ulps_diff && bBits < aBits + max_ulps_diff;
+}
+
+bool sk_double_nearly_zero(double a) {
+    return a == 0 || fabs(a) < sk_double_epsilon;
+}
diff --git a/gfx/skia/skia/src/base/SkHalf.cpp b/gfx/skia/skia/src/base/SkHalf.cpp
new file mode 100644
index 0000000000..024daa29b8
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkHalf.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkFloatBits.h"
+#include "src/base/SkHalf.h"
+
+uint16_t halfMantissa(SkHalf h) {
+    return h & 0x03ff;
+}
+
+uint16_t halfExponent(SkHalf h) {
+    return (h >> 10) & 0x001f;
+}
+
+uint16_t halfSign(SkHalf h) {
+    return h >> 15;
+}
+
+union FloatUIntUnion {
+    uint32_t fUInt;    // this must come first for the initializations below to work
+    float    fFloat;
+};
+
+// based on Fabien Giesen's float_to_half_fast3()
+// see https://gist.github.com/rygorous/2156668
+SkHalf SkFloatToHalf(float f) {
+    static const uint32_t f32infty = { 255 << 23 };
+    static const uint32_t f16infty = { 31 << 23 };
+    static const FloatUIntUnion magic = { 15 << 23 };
+    static const uint32_t sign_mask = 0x80000000u;
+    static const uint32_t round_mask = ~0xfffu;
+    SkHalf o = 0;
+
+    FloatUIntUnion floatUnion;
+    floatUnion.fFloat = f;
+
+    uint32_t sign = floatUnion.fUInt & sign_mask;
+    floatUnion.fUInt ^= sign;
+
+    // NOTE all the integer compares in this function can be safely
+    // compiled into signed compares since all operands are below
+    // 0x80000000. Important if you want fast straight SSE2 code
+    // (since there's no unsigned PCMPGTD).
+
+    // Inf or NaN (all exponent bits set)
+    if (floatUnion.fUInt >= f32infty)
+        // NaN->qNaN and Inf->Inf
+        o = (floatUnion.fUInt > f32infty) ? 0x7e00 : 0x7c00;
+    // (De)normalized number or zero
+    else {
+        floatUnion.fUInt &= round_mask;
+        floatUnion.fFloat *= magic.fFloat;
+        floatUnion.fUInt -= round_mask;
+        // Clamp to signed infinity if overflowed
+        if (floatUnion.fUInt > f16infty) {
+            floatUnion.fUInt = f16infty;
+        }
+
+        o = floatUnion.fUInt >> 13; // Take the bits!
+    }
+
+    o |= sign >> 16;
+    return o;
+}
+
+// based on Fabien Giesen's half_to_float_fast2()
+// see https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+float SkHalfToFloat(SkHalf h) {
+    static const FloatUIntUnion magic = { 126 << 23 };
+    FloatUIntUnion o;
+
+    if (halfExponent(h) == 0)
+    {
+        // Zero / Denormal
+        o.fUInt = magic.fUInt + halfMantissa(h);
+        o.fFloat -= magic.fFloat;
+    }
+    else
+    {
+        // Set mantissa
+        o.fUInt = halfMantissa(h) << 13;
+        // Set exponent
+        if (halfExponent(h) == 0x1f)
+            // Inf/NaN
+            o.fUInt |= (255 << 23);
+        else
+            o.fUInt |= ((127 - 15 + halfExponent(h)) << 23);
+    }
+
+    // Set sign
+    o.fUInt |= (halfSign(h) << 31);
+    return o.fFloat;
+}
diff --git a/gfx/skia/skia/src/base/SkHalf.h b/gfx/skia/skia/src/base/SkHalf.h
new file mode 100644
index 0000000000..d88c80d9db
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkHalf.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2014 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkHalf_DEFINED
+#define SkHalf_DEFINED
+
+#include "src/base/SkVx.h"
+
+// 16-bit floating point value
+// format is 1 bit sign, 5 bits exponent, 10 bits mantissa
+// only used for storage
+typedef uint16_t SkHalf;
+
+static constexpr uint16_t SK_HalfMin     = 0x0400; // 2^-14  (minimum positive normal value)
+static constexpr uint16_t SK_HalfMax     = 0x7bff; // 65504
+static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10
+static constexpr uint16_t SK_Half1       = 0x3C00; // 1
+
+// convert between half and single precision floating point
+float SkHalfToFloat(SkHalf h);
+SkHalf SkFloatToHalf(float f);
+
+// Convert between half and single precision floating point,
+// assuming inputs and outputs are both finite, and may
+// flush values which would be denormal half floats to zero.
+static inline skvx::float4 SkHalfToFloat_finite_ftz(uint64_t rgba) {
+    return skvx::from_half(skvx::half4::Load(&rgba));
+}
+static inline skvx::half4 SkFloatToHalf_finite_ftz(const skvx::float4& c) {
+    return skvx::to_half(c);
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkLeanWindows.h b/gfx/skia/skia/src/base/SkLeanWindows.h
new file mode 100644
index 0000000000..d43150db76
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkLeanWindows.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkLeanWindows_DEFINED
+#define SkLeanWindows_DEFINED
+
+#include "include/private/base/SkFeatures.h" // IWYU pragma: keep
+
+#ifdef SK_BUILD_FOR_WIN
+// https://devblogs.microsoft.com/oldnewthing/20091130-00/?p=15863
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
+#    define WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#  endif
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#    define NOMINMAX_WAS_LOCALLY_DEFINED
+#  endif
+#
+#  include <windows.h>
+#
+#  ifdef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#    undef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED
+#    undef WIN32_LEAN_AND_MEAN
+#  endif
+#  ifdef NOMINMAX_WAS_LOCALLY_DEFINED
+#    undef NOMINMAX_WAS_LOCALLY_DEFINED
+#    undef NOMINMAX
+#  endif
+#endif
+
+#endif  // SkLeanWindows_DEFINED
diff --git a/gfx/skia/skia/src/base/SkMSAN.h b/gfx/skia/skia/src/base/SkMSAN.h
new file mode 100644
index 0000000000..85fa2fce4b
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkMSAN.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMSAN_DEFINED
+#define SkMSAN_DEFINED
+
+#include "include/private/base/SkAssert.h"
+
+#include <cstddef>
+#include <string.h>
+
+// Typically declared in LLVM's msan_interface.h.  Easier for us to just re-declare.
+extern "C" {
+    void __msan_check_mem_is_initialized(const volatile void*, size_t);
+    void __msan_unpoison                (const volatile void*, size_t);
+}
+
+// Code that requires initialized inputs can call this to make it clear that
+// the blame for use of uninitialized data belongs further up the call stack.
+static inline void sk_msan_assert_initialized(const void* begin, const void* end) {
+#if defined(__has_feature)
+    #if __has_feature(memory_sanitizer)
+        __msan_check_mem_is_initialized(begin, (const char*)end - (const char*)begin);
+    #endif
+#endif
+}
+
+// Lie to MSAN that this range of memory is initialized.
+// This can hide serious problems if overused.  Every use of this should refer to a bug.
+static inline void sk_msan_mark_initialized(const void* begin, const void* end, const char* skbug) {
+    SkASSERT(skbug && 0 != strcmp(skbug, ""));
+#if defined(__has_feature)
+    #if __has_feature(memory_sanitizer)
+        __msan_unpoison(begin, (const char*)end - (const char*)begin);
+    #endif
+#endif
+}
+
+#endif//SkMSAN_DEFINED
diff --git a/gfx/skia/skia/src/base/SkMalloc.cpp b/gfx/skia/skia/src/base/SkMalloc.cpp
new file mode 100644
index 0000000000..944b4847b7
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkMalloc.cpp
@@ -0,0 +1,22 @@
+// Copyright 2019 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+
+#include "include/private/base/SkMalloc.h"
+
+#include "src/base/SkSafeMath.h"
+
+void* sk_calloc_throw(size_t count, size_t elemSize) {
+    return sk_calloc_throw(SkSafeMath::Mul(count, elemSize));
+}
+
+void* sk_malloc_throw(size_t count, size_t elemSize) {
+    return sk_malloc_throw(SkSafeMath::Mul(count, elemSize));
+}
+
+void* sk_realloc_throw(void* buffer, size_t count, size_t elemSize) {
+    return sk_realloc_throw(buffer, SkSafeMath::Mul(count, elemSize));
+}
+
+void* sk_malloc_canfail(size_t count, size_t elemSize) {
+    return sk_malloc_canfail(SkSafeMath::Mul(count, elemSize));
+}
diff --git a/gfx/skia/skia/src/base/SkMathPriv.cpp b/gfx/skia/skia/src/base/SkMathPriv.cpp
new file mode 100644
index 0000000000..2674e69886
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkMathPriv.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2008 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkMathPriv.h"
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkFloatingPoint.h"
+
+#include <cstddef>
+#include <cstdint>
+
+///////////////////////////////////////////////////////////////////////////////
+
+/* www.worldserver.com/turk/computergraphics/FixedSqrt.pdf
+*/
+int32_t SkSqrtBits(int32_t x, int count) {
+    SkASSERT(x >= 0 && count > 0 && (unsigned)count <= 30);
+
+    uint32_t    root = 0;
+    uint32_t    remHi = 0;
+    uint32_t    remLo = x;
+
+    do {
+        root <<= 1;
+
+        remHi = (remHi<<2) | (remLo>>30);
+        remLo <<= 2;
+
+        uint32_t testDiv = (root << 1) + 1;
+        if (remHi >= testDiv) {
+            remHi -= testDiv;
+            root++;
+        }
+    } while (--count >= 0);
+
+    return root;
+}
+
+// Kernighan's method
+int SkPopCount_portable(uint32_t n) {
+    int count = 0;
+
+    while (n) {
+        n &= (n - 1); // Remove the lowest bit in the integer.
+        count++;
+    }
+    return count;
+}
+
+// Here we strip off the unwanted bits and then return the number of trailing zero bits
+int SkNthSet(uint32_t target, int n) {
+    SkASSERT(n < SkPopCount(target));
+
+    for (int i = 0; i < n; ++i) {
+        target &= (target - 1); // Remove the lowest bit in the integer.
+    }
+
+    return SkCTZ(target);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool sk_floats_are_unit(const float array[], size_t count) {
+    bool is_unit = true;
+    for (size_t i = 0; i < count; ++i) {
+        is_unit &= (array[i] >= 0) & (array[i] <= 1);
+    }
+    return is_unit;
+}
diff --git a/gfx/skia/skia/src/base/SkMathPriv.h b/gfx/skia/skia/src/base/SkMathPriv.h
new file mode 100644
index 0000000000..0bcb113b6d
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkMathPriv.h
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkMathPriv_DEFINED
+#define SkMathPriv_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkCPUTypes.h"
+#include "include/private/base/SkTemplates.h"
+
+#include <cstddef>
+#include <cstdint>
+
+/**
+ *  Return the integer square root of value, with a bias of bitBias
+ */
+int32_t SkSqrtBits(int32_t value, int bitBias);
+
+/** Return the integer square root of n, treated as a SkFixed (16.16)
+ */
+static inline int32_t SkSqrt32(int32_t n) { return SkSqrtBits(n, 15); }
+
+/**
+ *  Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches)
+ */
+static inline int SkClampPos(int value) {
+    return value & ~(value >> 31);
+}
+
+/**
+ * Stores numer/denom and numer%denom into div and mod respectively.
+ */
+template <typename In, typename Out>
+inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {
+#ifdef SK_CPU_ARM32
+    // If we wrote this as in the else branch, GCC won't fuse the two into one
+    // divmod call, but rather a div call followed by a divmod.  Silly!  This
+    // version is just as fast as calling __aeabi_[u]idivmod manually, but with
+    // prettier code.
+    //
+    // This benches as around 2x faster than the code in the else branch.
+    const In d = numer/denom;
+    *div = static_cast<Out>(d);
+    *mod = static_cast<Out>(numer-d*denom);
+#else
+    // On x86 this will just be a single idiv.
+    *div = static_cast<Out>(numer/denom);
+    *mod = static_cast<Out>(numer%denom);
+#endif
+}
+
+/** Returns -1 if n < 0, else returns 0
+ */
+#define SkExtractSign(n)    ((int32_t)(n) >> 31)
+
+/** If sign == -1, returns -n, else sign must be 0, and returns n.
+ Typically used in conjunction with SkExtractSign().
+ */
+static inline int32_t SkApplySign(int32_t n, int32_t sign) {
+    SkASSERT(sign == 0 || sign == -1);
+    return (n ^ sign) - sign;
+}
+
+/** Return x with the sign of y */
+static inline int32_t SkCopySign32(int32_t x, int32_t y) {
+    return SkApplySign(x, SkExtractSign(x ^ y));
+}
+
+/** Given a positive value and a positive max, return the value
+ pinned against max.
+ Note: only works as long as max - value doesn't wrap around
+ @return max if value >= max, else value
+ */
+static inline unsigned SkClampUMax(unsigned value, unsigned max) {
+    if (value > max) {
+        value = max;
+    }
+    return value;
+}
+
+// If a signed int holds min_int (e.g. 0x80000000) it is undefined what happens when
+// we negate it (even though we *know* we're 2's complement and we'll get the same
+// value back). So we create this helper function that casts to size_t (unsigned) first,
+// to avoid the complaint.
+static inline size_t sk_negate_to_size_t(int32_t value) {
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4146)  // Thanks MSVC, we know what we're negating an unsigned
+#endif
+    return -static_cast<size_t>(value);
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+/** Return a*b/255, truncating away any fractional bits. Only valid if both
+ a and b are 0..255
+ */
+static inline U8CPU SkMulDiv255Trunc(U8CPU a, U8CPU b) {
+    SkASSERT((uint8_t)a == a);
+    SkASSERT((uint8_t)b == b);
+    unsigned prod = a*b + 1;
+    return (prod + (prod >> 8)) >> 8;
+}
+
+/** Return (a*b)/255, taking the ceiling of any fractional bits. Only valid if
+ both a and b are 0..255. The expected result equals (a * b + 254) / 255.
+ */
+static inline U8CPU SkMulDiv255Ceiling(U8CPU a, U8CPU b) {
+    SkASSERT((uint8_t)a == a);
+    SkASSERT((uint8_t)b == b);
+    unsigned prod = a*b + 255;
+    return (prod + (prod >> 8)) >> 8;
+}
+
+/** Just the rounding step in SkDiv255Round: round(value / 255)
+ */
+static inline unsigned SkDiv255Round(unsigned prod) {
+    prod += 128;
+    return (prod + (prod >> 8)) >> 8;
+}
+
+/**
+ * Swap byte order of a 4-byte value, e.g. 0xaarrggbb -> 0xbbggrraa.
+ */
+#if defined(_MSC_VER)
+    #include <stdlib.h>
+    static inline uint32_t SkBSwap32(uint32_t v) { return _byteswap_ulong(v); }
+#else
+    static inline uint32_t SkBSwap32(uint32_t v) { return __builtin_bswap32(v); }
+#endif
+
+/*
+ * Return the number of set bits (i.e., the population count) in the provided uint32_t.
+ */
+int SkPopCount_portable(uint32_t n);
+
+#if defined(__GNUC__) || defined(__clang__)
+    static inline int SkPopCount(uint32_t n) {
+        return __builtin_popcount(n);
+    }
+#else
+    static inline int SkPopCount(uint32_t n) {
+        return SkPopCount_portable(n);
+    }
+#endif
+
+/*
+ * Return the 0-based index of the nth bit set in target
+ * Returns 32 if there is no nth bit set.
+ */
+int SkNthSet(uint32_t target, int n);
+
+//! Returns the number of leading zero bits (0...32)
+// From Hacker's Delight 2nd Edition
+constexpr int SkCLZ_portable(uint32_t x) {
+    int n = 32;
+    uint32_t y = x >> 16; if (y != 0) {n -= 16; x = y;}
+             y = x >>  8; if (y != 0) {n -=  8; x = y;}
+             y = x >>  4; if (y != 0) {n -=  4; x = y;}
+             y = x >>  2; if (y != 0) {n -=  2; x = y;}
+             y = x >>  1; if (y != 0) {return n - 2;}
+    return n - static_cast<int>(x);
+}
+
+static_assert(32 == SkCLZ_portable(0));
+static_assert(31 == SkCLZ_portable(1));
+static_assert( 1 == SkCLZ_portable(1 << 30));
+static_assert( 1 == SkCLZ_portable((1 << 30) | (1 << 24) | 1));
+static_assert( 0 == SkCLZ_portable(~0U));
+
+#if defined(SK_BUILD_FOR_WIN)
+    #include <intrin.h>
+
+    static inline int SkCLZ(uint32_t mask) {
+        if (mask) {
+            unsigned long index = 0;
+            _BitScanReverse(&index, mask);
+            // Suppress this bogus /analyze warning. The check for non-zero
+            // guarantees that _BitScanReverse will succeed.
+            #pragma warning(suppress : 6102) // Using 'index' from failed function call
+            return index ^ 0x1F;
+        } else {
+            return 32;
+        }
+    }
+#elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__)
+    static inline int SkCLZ(uint32_t mask) {
+        // __builtin_clz(0) is undefined, so we have to detect that case.
+        return mask ? __builtin_clz(mask) : 32;
+    }
+#else
+    static inline int SkCLZ(uint32_t mask) {
+        return SkCLZ_portable(mask);
+    }
+#endif
+
+//! Returns the number of trailing zero bits (0...32)
+// From Hacker's Delight 2nd Edition
+constexpr int SkCTZ_portable(uint32_t x) {
+    return 32 - SkCLZ_portable(~x & (x - 1));
+}
+
+static_assert(32 == SkCTZ_portable(0));
+static_assert( 0 == SkCTZ_portable(1));
+static_assert(30 == SkCTZ_portable(1 << 30));
+static_assert( 2 == SkCTZ_portable((1 << 30) | (1 << 24) | (1 << 2)));
+static_assert( 0 == SkCTZ_portable(~0U));
+
+#if defined(SK_BUILD_FOR_WIN)
+    #include <intrin.h>
+
+    static inline int SkCTZ(uint32_t mask) {
+        if (mask) {
+            unsigned long index = 0;
+            _BitScanForward(&index, mask);
+            // Suppress this bogus /analyze warning. The check for non-zero
+            // guarantees that _BitScanReverse will succeed.
+            #pragma warning(suppress : 6102) // Using 'index' from failed function call
+            return index;
+        } else {
+            return 32;
+        }
+    }
+#elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__)
+    static inline int SkCTZ(uint32_t mask) {
+        // __builtin_ctz(0) is undefined, so we have to detect that case.
+        return mask ? __builtin_ctz(mask) : 32;
+    }
+#else
+    static inline int SkCTZ(uint32_t mask) {
+        return SkCTZ_portable(mask);
+    }
+#endif
+
+/**
+ *  Returns the log2 of the specified value, were that value to be rounded up
+ *  to the next power of 2. It is undefined to pass 0. Examples:
+ *  SkNextLog2(1) -> 0
+ *  SkNextLog2(2) -> 1
+ *  SkNextLog2(3) -> 2
+ *  SkNextLog2(4) -> 2
+ *  SkNextLog2(5) -> 3
+ */
+static inline int SkNextLog2(uint32_t value) {
+    SkASSERT(value != 0);
+    return 32 - SkCLZ(value - 1);
+}
+
+constexpr int SkNextLog2_portable(uint32_t value) {
+    SkASSERT(value != 0);
+    return 32 - SkCLZ_portable(value - 1);
+}
+
+/**
+*  Returns the log2 of the specified value, were that value to be rounded down
+*  to the previous power of 2. It is undefined to pass 0. Examples:
+*  SkPrevLog2(1) -> 0
+*  SkPrevLog2(2) -> 1
+*  SkPrevLog2(3) -> 1
+*  SkPrevLog2(4) -> 2
+*  SkPrevLog2(5) -> 2
+*/
+static inline int SkPrevLog2(uint32_t value) {
+    SkASSERT(value != 0);
+    return 32 - SkCLZ(value >> 1);
+}
+
+constexpr int SkPrevLog2_portable(uint32_t value) {
+    SkASSERT(value != 0);
+    return 32 - SkCLZ_portable(value >> 1);
+}
+
+/**
+ *  Returns the smallest power-of-2 that is >= the specified value. If value
+ *  is already a power of 2, then it is returned unchanged. It is undefined
+ *  if value is <= 0.
+ */
+static inline int SkNextPow2(int value) {
+    SkASSERT(value > 0);
+    return 1 << SkNextLog2(static_cast<uint32_t>(value));
+}
+
+constexpr int SkNextPow2_portable(int value) {
+    SkASSERT(value > 0);
+    return 1 << SkNextLog2_portable(static_cast<uint32_t>(value));
+}
+
+/**
+*  Returns the largest power-of-2 that is <= the specified value. If value
+*  is already a power of 2, then it is returned unchanged. It is undefined
+*  if value is <= 0.
+*/
+static inline int SkPrevPow2(int value) {
+    SkASSERT(value > 0);
+    return 1 << SkPrevLog2(static_cast<uint32_t>(value));
+}
+
+constexpr int SkPrevPow2_portable(int value) {
+    SkASSERT(value > 0);
+    return 1 << SkPrevLog2_portable(static_cast<uint32_t>(value));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ *  Return the smallest power-of-2 >= n.
+ */
+static inline uint32_t GrNextPow2(uint32_t n) {
+    return n ? (1 << (32 - SkCLZ(n - 1))) : 1;
+}
+
+/**
+ * Returns the next power of 2 >= n or n if the next power of 2 can't be represented by size_t.
+ */
+static inline size_t GrNextSizePow2(size_t n) {
+    constexpr int kNumSizeTBits = 8 * sizeof(size_t);
+    constexpr size_t kHighBitSet = size_t(1) << (kNumSizeTBits - 1);
+
+    if (!n) {
+        return 1;
+    } else if (n >= kHighBitSet) {
+        return n;
+    }
+
+    n--;
+    uint32_t shift = 1;
+    while (shift < kNumSizeTBits) {
+        n |= n >> shift;
+        shift <<= 1;
+    }
+    return n + 1;
+}
+
+// conservative check. will return false for very large values that "could" fit
+template <typename T> static inline bool SkFitsInFixed(T x) {
+    return SkTAbs(x) <= 32767.0f;
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkQuads.cpp b/gfx/skia/skia/src/base/SkQuads.cpp
new file mode 100644
index 0000000000..a77837932c
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkQuads.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "src/base/SkQuads.h"
+
+#include "include/private/base/SkFloatingPoint.h"
+
+#include <cmath>
+
+// Solve 0 = M * x + B. If M is 0, there are no solutions, unless B is also 0,
+// in which case there are infinite solutions, so we just return 1 of them.
+static int solve_linear(const double M, const double B, double solution[2]) {
+    if (sk_double_nearly_zero(M)) {
+        solution[0] = 0;
+        if (sk_double_nearly_zero(B)) {
+            return 1;
+        }
+        return 0;
+    }
+    solution[0] = -B / M;
+    if (!std::isfinite(solution[0])) {
+        return 0;
+    }
+    return 1;
+}
+
+// When the A coefficient of a quadratic is close to 0, there can be floating point error
+// that arises from computing a very large root. In those cases, we would rather be
+// precise about the one smaller root, so we have this arbitrary cutoff for when A is
+// really small or small compared to B.
+static bool close_to_linear(double A, double B) {
+    if (sk_double_nearly_zero(B)) {
+        return sk_double_nearly_zero(A);
+    }
+    // This is a different threshold (tighter) than the close_to_a_quadratic in SkCubics.cpp
+    // because the SkQuads::RootsReal gives better answers for longer as A/B -> 0.
+    return std::abs(A / B) < 1.0e-16;
+}
+
+int SkQuads::RootsReal(const double A, const double B, const double C, double solution[2]) {
+    if (close_to_linear(A, B)) {
+        return solve_linear(B, C, solution);
+    }
+    // If A is zero (e.g. B was nan and thus close_to_linear was false), we will
+    // temporarily have infinities rolling about, but will catch that when checking
+    // p2 - q.
+    const double p = sk_ieee_double_divide(B, 2 * A);
+    const double q = sk_ieee_double_divide(C, A);
+    /* normal form: x^2 + px + q = 0 */
+    const double p2 = p * p;
+    if (!std::isfinite(p2 - q) ||
+        (!sk_double_nearly_zero(p2 - q) && p2 < q)) {
+        return 0;
+    }
+    double sqrt_D = 0;
+    if (p2 > q) {
+        sqrt_D = sqrt(p2 - q);
+    }
+    solution[0] = sqrt_D - p;
+    solution[1] = -sqrt_D - p;
+    if (sk_double_nearly_zero(sqrt_D) ||
+        sk_doubles_nearly_equal_ulps(solution[0], solution[1])) {
+        return 1;
+    }
+    return 2;
+}
diff --git a/gfx/skia/skia/src/base/SkQuads.h b/gfx/skia/skia/src/base/SkQuads.h
new file mode 100644
index 0000000000..645d43bcd4
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkQuads.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkQuads_DEFINED
+#define SkQuads_DEFINED
+
+/**
+ * Utilities for dealing with quadratic formulas with one variable:
+ *   f(t) = A*t^2 + B*t + C
+ */
+class SkQuads {
+public:
+    /**
+     * Puts up to 2 real solutions to the equation
+     *   A*t^2 + B*t + C = 0
+     * in the provided array.
+     */
+    static int RootsReal(double A, double B, double C,
+                         double solution[2]);
+
+    /**
+     * Evaluates the quadratic function with the 3 provided coefficients and the
+     * provided variable.
+     */
+    static double EvalAt(double A, double B, double C, double t) {
+        return A * t * t +
+               B * t +
+               C;
+    }
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkRandom.h b/gfx/skia/skia/src/base/SkRandom.h
new file mode 100644
index 0000000000..96b3824896
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkRandom.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkRandom_DEFINED
+#define SkRandom_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkFixed.h"
+#include "include/private/base/SkFloatBits.h"
+
+#include <cstdint>
+
+typedef float SkScalar;
+
+/** \class SkRandom
+
+ Utility class that implements pseudo random 32bit numbers using Marsaglia's
+ multiply-with-carry "mother of all" algorithm. Unlike rand(), this class holds
+ its own state, so that multiple instances can be used with no side-effects.
+
+ Has a large period and all bits are well-randomized.
+ */
+class SkRandom {
+public:
+    SkRandom() { init(0); }
+    SkRandom(uint32_t seed) { init(seed); }
+    SkRandom(const SkRandom& rand) : fK(rand.fK), fJ(rand.fJ) {}
+
+    SkRandom& operator=(const SkRandom& rand) {
+        fK = rand.fK;
+        fJ = rand.fJ;
+
+        return *this;
+    }
+
+    /** Return the next pseudo random number as an unsigned 32bit value.
+     */
+    uint32_t nextU() {
+        fK = kKMul*(fK & 0xffff) + (fK >> 16);
+        fJ = kJMul*(fJ & 0xffff) + (fJ >> 16);
+        return (((fK << 16) | (fK >> 16)) + fJ);
+    }
+
+    /** Return the next pseudo random number as a signed 32bit value.
+     */
+    int32_t nextS() { return (int32_t)this->nextU(); }
+
+    /**
+     *  Returns value [0...1) as an IEEE float
+     */
+    float nextF() {
+        int floatint = 0x3f800000 | (int)(this->nextU() >> 9);
+        float f = SkBits2Float(floatint) - 1.0f;
+        return f;
+    }
+
+    /**
+     *  Returns value [min...max) as a float
+     */
+    float nextRangeF(float min, float max) {
+        return min + this->nextF() * (max - min);
+    }
+
+    /** Return the next pseudo random number, as an unsigned value of
+     at most bitCount bits.
+     @param bitCount The maximum number of bits to be returned
+     */
+    uint32_t nextBits(unsigned bitCount) {
+        SkASSERT(bitCount > 0 && bitCount <= 32);
+        return this->nextU() >> (32 - bitCount);
+    }
+
+    /** Return the next pseudo random unsigned number, mapped to lie within
+     [min, max] inclusive.
+     */
+    uint32_t nextRangeU(uint32_t min, uint32_t max) {
+        SkASSERT(min <= max);
+        uint32_t range = max - min + 1;
+        if (0 == range) {
+            return this->nextU();
+        } else {
+            return min + this->nextU() % range;
+        }
+    }
+
+    /** Return the next pseudo random unsigned number, mapped to lie within
+     [0, count).
+     */
+    uint32_t nextULessThan(uint32_t count) {
+        SkASSERT(count > 0);
+        return this->nextRangeU(0, count - 1);
+    }
+
+    /** Return the next pseudo random number expressed as a SkScalar
+     in the range [0..SK_Scalar1).
+     */
+    SkScalar nextUScalar1() { return SkFixedToScalar(this->nextUFixed1()); }
+
+    /** Return the next pseudo random number expressed as a SkScalar
+     in the range [min..max).
+     */
+    SkScalar nextRangeScalar(SkScalar min, SkScalar max) {
+        return this->nextUScalar1() * (max - min) + min;
+    }
+
+    /** Return the next pseudo random number expressed as a SkScalar
+     in the range [-SK_Scalar1..SK_Scalar1).
+     */
+    SkScalar nextSScalar1() { return SkFixedToScalar(this->nextSFixed1()); }
+
+    /** Return the next pseudo random number as a bool.
+     */
+    bool nextBool() { return this->nextU() >= 0x80000000; }
+
+    /** A biased version of nextBool().
+     */
+    bool nextBiasedBool(SkScalar fractionTrue) {
+        SkASSERT(fractionTrue >= 0 && fractionTrue <= 1);
+        return this->nextUScalar1() <= fractionTrue;
+    }
+
+    /** Reset the random object.
+     */
+    void setSeed(uint32_t seed) { init(seed); }
+
+private:
+    // Initialize state variables with LCG.
+    // We must ensure that both J and K are non-zero, otherwise the
+    // multiply-with-carry step will forevermore return zero.
+    void init(uint32_t seed) {
+        fK = NextLCG(seed);
+        if (0 == fK) {
+            fK = NextLCG(fK);
+        }
+        fJ = NextLCG(fK);
+        if (0 == fJ) {
+            fJ = NextLCG(fJ);
+        }
+        SkASSERT(0 != fK && 0 != fJ);
+    }
+    static uint32_t NextLCG(uint32_t seed) { return kMul*seed + kAdd; }
+
+    /** Return the next pseudo random number expressed as an unsigned SkFixed
+     in the range [0..SK_Fixed1).
+     */
+    SkFixed nextUFixed1() { return this->nextU() >> 16; }
+
+    /** Return the next pseudo random number expressed as a signed SkFixed
+     in the range [-SK_Fixed1..SK_Fixed1).
+     */
+    SkFixed nextSFixed1() { return this->nextS() >> 15; }
+
+    //  See "Numerical Recipes in C", 1992 page 284 for these constants
+    //  For the LCG that sets the initial state from a seed
+    enum {
+        kMul = 1664525,
+        kAdd = 1013904223
+    };
+    // Constants for the multiply-with-carry steps
+    enum {
+        kKMul = 30345,
+        kJMul = 18000,
+    };
+
+    uint32_t fK;
+    uint32_t fJ;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkRectMemcpy.h b/gfx/skia/skia/src/base/SkRectMemcpy.h
new file mode 100644
index 0000000000..07ba0f0c65
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkRectMemcpy.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkRectMemcpy_DEFINED
+#define SkRectMemcpy_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkTemplates.h"
+
+#include <cstring>
+
+static inline void SkRectMemcpy(void* dst, size_t dstRB, const void* src, size_t srcRB,
+                                size_t trimRowBytes, int rowCount) {
+    SkASSERT(trimRowBytes <= dstRB);
+    SkASSERT(trimRowBytes <= srcRB);
+    if (trimRowBytes == dstRB && trimRowBytes == srcRB) {
+        memcpy(dst, src, trimRowBytes * rowCount);
+        return;
+    }
+
+    for (int i = 0; i < rowCount; ++i) {
+        memcpy(dst, src, trimRowBytes);
+        dst = SkTAddOffset<void>(dst, dstRB);
+        src = SkTAddOffset<const void>(src, srcRB);
+    }
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkSafeMath.cpp b/gfx/skia/skia/src/base/SkSafeMath.cpp
new file mode 100644
index 0000000000..cb69125edb
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkSafeMath.cpp
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2023 Google LLC
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkSafeMath.h"
+
+size_t SkSafeMath::Add(size_t x, size_t y) {
+    SkSafeMath tmp;
+    size_t sum = tmp.add(x, y);
+    return tmp.ok() ? sum : SIZE_MAX;
+}
+
+size_t SkSafeMath::Mul(size_t x, size_t y) {
+    SkSafeMath tmp;
+    size_t prod = tmp.mul(x, y);
+    return tmp.ok() ? prod : SIZE_MAX;
+}
diff --git a/gfx/skia/skia/src/base/SkSafeMath.h b/gfx/skia/skia/src/base/SkSafeMath.h
new file mode 100644
index 0000000000..8ca44749f4
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkSafeMath.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkSafeMath_DEFINED
+#define SkSafeMath_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h" // IWYU pragma: keep
+#include "include/private/base/SkTFitsIn.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+
+// SkSafeMath always check that a series of operations do not overflow.
+// This must be correct for all platforms, because this is a check for safety at runtime.
+
+class SkSafeMath {
+public:
+    SkSafeMath() = default;
+
+    bool ok() const { return fOK; }
+    explicit operator bool() const { return fOK; }
+
+    size_t mul(size_t x, size_t y) {
+        return sizeof(size_t) == sizeof(uint64_t) ? mul64(x, y) : mul32(x, y);
+    }
+
+    size_t add(size_t x, size_t y) {
+        size_t result = x + y;
+        fOK &= result >= x;
+        return result;
+    }
+
+    /**
+     *  Return a + b, unless this result is an overflow/underflow. In those cases, fOK will
+     *  be set to false, and it is undefined what this returns.
+     */
+    int addInt(int a, int b) {
+        if (b < 0 && a < std::numeric_limits<int>::min() - b) {
+            fOK = false;
+            return a;
+        } else if (b > 0 && a > std::numeric_limits<int>::max() - b) {
+            fOK = false;
+            return a;
+        }
+        return a + b;
+    }
+
+    size_t alignUp(size_t x, size_t alignment) {
+        SkASSERT(alignment && !(alignment & (alignment - 1)));
+        return add(x, alignment - 1) & ~(alignment - 1);
+    }
+
+    template <typename T> T castTo(size_t value) {
+        if (!SkTFitsIn<T>(value)) {
+            fOK = false;
+        }
+        return static_cast<T>(value);
+    }
+
+    // These saturate to their results
+    static size_t Add(size_t x, size_t y);
+    static size_t Mul(size_t x, size_t y);
+    static size_t Align4(size_t x) {
+        SkSafeMath safe;
+        return safe.alignUp(x, 4);
+    }
+
+private:
+    uint32_t mul32(uint32_t x, uint32_t y) {
+        uint64_t bx = x;
+        uint64_t by = y;
+        uint64_t result = bx * by;
+        fOK &= result >> 32 == 0;
+        // Overflow information is capture in fOK. Return the result modulo 2^32.
+        return (uint32_t)result;
+    }
+
+    uint64_t mul64(uint64_t x, uint64_t y) {
+        if (x <= std::numeric_limits<uint64_t>::max() >> 32
+            && y <= std::numeric_limits<uint64_t>::max() >> 32) {
+            return x * y;
+        } else {
+            auto hi = [](uint64_t x) { return x >> 32; };
+            auto lo = [](uint64_t x) { return x & 0xFFFFFFFF; };
+
+            uint64_t lx_ly = lo(x) * lo(y);
+            uint64_t hx_ly = hi(x) * lo(y);
+            uint64_t lx_hy = lo(x) * hi(y);
+            uint64_t hx_hy = hi(x) * hi(y);
+            uint64_t result = 0;
+            result = this->add(lx_ly, (hx_ly << 32));
+            result = this->add(result, (lx_hy << 32));
+            fOK &= (hx_hy + (hx_ly >> 32) + (lx_hy >> 32)) == 0;
+
+            #if defined(SK_DEBUG) && defined(__clang__) && defined(__x86_64__)
+                auto double_check = (unsigned __int128)x * y;
+                SkASSERT(result == (double_check & 0xFFFFFFFFFFFFFFFF));
+                SkASSERT(!fOK || (double_check >> 64 == 0));
+            #endif
+
+            return result;
+        }
+    }
+    bool fOK = true;
+};
+
+#endif//SkSafeMath_DEFINED
diff --git a/gfx/skia/skia/src/base/SkScopeExit.h b/gfx/skia/skia/src/base/SkScopeExit.h
new file mode 100644
index 0000000000..9c3581b464
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkScopeExit.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkScopeExit_DEFINED
+#define SkScopeExit_DEFINED
+
+#include "include/private/base/SkMacros.h"
+
+#include <functional>
+#include <utility>
+
+/** SkScopeExit calls a std:::function<void()> in its destructor. */
+class SkScopeExit {
+public:
+    SkScopeExit() = default;
+    SkScopeExit(std::function<void()> f) : fFn(std::move(f)) {}
+    SkScopeExit(SkScopeExit&& that) : fFn(std::move(that.fFn)) {}
+
+    ~SkScopeExit() {
+        if (fFn) {
+            fFn();
+        }
+    }
+
+    void clear() { fFn = {}; }
+
+    SkScopeExit& operator=(SkScopeExit&& that) {
+        fFn = std::move(that.fFn);
+        return *this;
+    }
+
+private:
+    std::function<void()> fFn;
+
+    SkScopeExit(           const SkScopeExit& ) = delete;
+    SkScopeExit& operator=(const SkScopeExit& ) = delete;
+};
+
+/**
+ * SK_AT_SCOPE_EXIT(stmt) evaluates stmt when the current scope ends.
+ *
+ * E.g.
+ *    {
+ *        int x = 5;
+ *        {
+ *           SK_AT_SCOPE_EXIT(x--);
+ *           SkASSERT(x == 5);
+ *        }
+ *        SkASSERT(x == 4);
+ *    }
+ */
+#define SK_AT_SCOPE_EXIT(stmt)                              \
+    SkScopeExit SK_MACRO_APPEND_LINE(at_scope_exit_)([&]() { stmt; })
+
+#endif  // SkScopeExit_DEFINED
diff --git a/gfx/skia/skia/src/base/SkSemaphore.cpp b/gfx/skia/skia/src/base/SkSemaphore.cpp
new file mode 100644
index 0000000000..cb85fa9745
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkSemaphore.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkSemaphore.h"
+
+#include "include/private/base/SkFeatures.h" // IWYU pragma: keep
+
+#if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS)
+    #include <dispatch/dispatch.h>
+
+    struct SkSemaphore::OSSemaphore {
+        dispatch_semaphore_t fSemaphore;
+
+        OSSemaphore()  { fSemaphore = dispatch_semaphore_create(0/*initial count*/); }
+        ~OSSemaphore() { dispatch_release(fSemaphore); }
+
+        void signal(int n) { while (n --> 0) { dispatch_semaphore_signal(fSemaphore); } }
+        void wait() { dispatch_semaphore_wait(fSemaphore, DISPATCH_TIME_FOREVER); }
+    };
+#elif defined(SK_BUILD_FOR_WIN)
+#include "src/base/SkLeanWindows.h"
+
+    struct SkSemaphore::OSSemaphore {
+        HANDLE fSemaphore;
+
+        OSSemaphore()  {
+            fSemaphore = CreateSemaphore(nullptr    /*security attributes, optional*/,
+                                         0       /*initial count*/,
+                                         MAXLONG /*max count*/,
+                                         nullptr    /*name, optional*/);
+        }
+        ~OSSemaphore() { CloseHandle(fSemaphore); }
+
+        void signal(int n) {
+            ReleaseSemaphore(fSemaphore, n, nullptr/*returns previous count, optional*/);
+        }
+        void wait() { WaitForSingleObject(fSemaphore, INFINITE/*timeout in ms*/); }
+    };
+#else
+    // It's important we test for Mach before this.  This code will compile but not work there.
+    #include <errno.h>
+    #include <semaphore.h>
+    struct SkSemaphore::OSSemaphore {
+        sem_t fSemaphore;
+
+        OSSemaphore()  { sem_init(&fSemaphore, 0/*cross process?*/, 0/*initial count*/); }
+        ~OSSemaphore() { sem_destroy(&fSemaphore); }
+
+        void signal(int n) { while (n --> 0) { sem_post(&fSemaphore); } }
+        void wait() {
+            // Try until we're not interrupted.
+            while(sem_wait(&fSemaphore) == -1 && errno == EINTR);
+        }
+    };
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+SkSemaphore::~SkSemaphore() {
+    delete fOSSemaphore;
+}
+
+void SkSemaphore::osSignal(int n) {
+    fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; });
+    fOSSemaphore->signal(n);
+}
+
+void SkSemaphore::osWait() {
+    fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; });
+    fOSSemaphore->wait();
+}
+
+bool SkSemaphore::try_wait() {
+    int count = fCount.load(std::memory_order_relaxed);
+    if (count > 0) {
+        return fCount.compare_exchange_weak(count, count-1, std::memory_order_acquire);
+    }
+    return false;
+}
diff --git a/gfx/skia/skia/src/base/SkStringView.h b/gfx/skia/skia/src/base/SkStringView.h
new file mode 100644
index 0000000000..f8f83ae77e
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkStringView.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2021 Google LLC.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkStringView_DEFINED
+#define SkStringView_DEFINED
+
+#include <cstring>
+#include <string_view>
+
+namespace skstd {
+
+// C++20 additions
+inline constexpr bool starts_with(std::string_view str, std::string_view prefix) {
+    if (prefix.length() > str.length()) {
+        return false;
+    }
+    return prefix.length() == 0 || !memcmp(str.data(), prefix.data(), prefix.length());
+}
+
+inline constexpr bool starts_with(std::string_view str, std::string_view::value_type c) {
+    return !str.empty() && str.front() == c;
+}
+
+inline constexpr bool ends_with(std::string_view str, std::string_view suffix) {
+    if (suffix.length() > str.length()) {
+        return false;
+    }
+    return suffix.length() == 0 || !memcmp(str.data() + str.length() - suffix.length(),
+                                           suffix.data(), suffix.length());
+}
+
+inline constexpr bool ends_with(std::string_view str, std::string_view::value_type c) {
+    return !str.empty() && str.back() == c;
+}
+
+// C++23 additions
+inline constexpr bool contains(std::string_view str, std::string_view needle) {
+    return str.find(needle) != std::string_view::npos;
+}
+
+inline constexpr bool contains(std::string_view str, std::string_view::value_type c) {
+    return str.find(c) != std::string_view::npos;
+}
+
+}  // namespace skstd
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTBlockList.h b/gfx/skia/skia/src/base/SkTBlockList.h
new file mode 100644
index 0000000000..88e91a92bb
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTBlockList.h
@@ -0,0 +1,448 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTBlockList_DEFINED
+#define SkTBlockList_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkTo.h"
+#include "src/base/SkBlockAllocator.h"
+
+#include <algorithm>
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+// Forward declarations for the iterators used by SkTBlockList
+using IndexFn = int (*)(const SkBlockAllocator::Block*);
+using NextFn = int (*)(const SkBlockAllocator::Block*, int);
+template<typename T, typename B> using ItemFn = T (*)(B*, int);
+template <typename T, bool Forward, bool Const, IndexFn Start, IndexFn End, NextFn Next,
+          ItemFn<T, typename std::conditional<Const, const SkBlockAllocator::Block,
+                                                     SkBlockAllocator::Block>::type> Resolve>
+class BlockIndexIterator;
+
+/**
+ * SkTBlockList manages dynamic storage for instances of T, reserving fixed blocks such that
+ * allocation is amortized across every N instances. In this way it is a hybrid of an array-based
+ * vector and a linked-list. T can be any type and non-trivial destructors are automatically
+ * invoked when the SkTBlockList is destructed. The addresses of instances are guaranteed
+ * not to move except when a list is concatenated to another.
+ *
+ * The collection supports storing a templated number of elements inline before heap-allocated
+ * blocks are made to hold additional instances. By default, the heap blocks are sized to hold the
+ * same number of items as the inline block. A common pattern is to have the inline size hold only
+ * a small number of items for the common case and then allocate larger blocks when needed.
+ *
+ * If the size of a collection is N, and its block size is B, the complexity of the common
+ * operations are:
+ *  - push_back()/emplace_back(): O(1), with malloc O(B)
+ *  - pop_back(): O(1), with free O(B)
+ *  - front()/back(): O(1)
+ *  - reset(): O(N) for non-trivial types, O(N/B) for trivial types
+ *  - concat(): O(B)
+ *  - random access: O(N/B)
+ *  - iteration: O(1) at each step
+ *
+ * These characteristics make it well suited for allocating items in a LIFO ordering, or otherwise
+ * acting as a stack, or simply using it as a typed allocator.
+ */
+template <typename T, int StartingItems = 1>
+class SkTBlockList {
+public:
+    /**
+     * Create an allocator that defaults to using StartingItems as heap increment.
+     */
+    SkTBlockList() : SkTBlockList(StartingItems) {}
+
+    /**
+     * Create an allocator
+     *
+     * @param   itemsPerBlock   the number of items to allocate at once
+     */
+    explicit SkTBlockList(int itemsPerBlock,
+                          SkBlockAllocator::GrowthPolicy policy =
+                                  SkBlockAllocator::GrowthPolicy::kFixed)
+            : fAllocator(policy,
+                         SkBlockAllocator::BlockOverhead<alignof(T)>() + sizeof(T)*itemsPerBlock) {}
+
+    ~SkTBlockList() { this->reset(); }
+
+    /**
+     * Adds an item and returns it.
+     *
+     * @return the added item.
+     */
+    T& push_back() {
+        return *new (this->pushItem()) T;
+    }
+    T& push_back(const T& t) {
+        return *new (this->pushItem()) T(t);
+    }
+    T& push_back(T&& t) {
+        return *new (this->pushItem()) T(std::move(t));
+    }
+
+    template <typename... Args>
+    T& emplace_back(Args&&... args) {
+        return *new (this->pushItem()) T(std::forward<Args>(args)...);
+    }
+
+    /**
+     * Move all items from 'other' to the end of this collection. When this returns, 'other' will
+     * be empty. Items in 'other' may be moved as part of compacting the pre-allocated start of
+     * 'other' into this list (using T's move constructor or memcpy if T is trivially copyable), but
+     * this is O(StartingItems) and not O(N). All other items are concatenated in O(1).
+     */
+    template <int SI>
+    void concat(SkTBlockList<T, SI>&& other);
+
+    /**
+     * Allocate, if needed, space to hold N more Ts before another malloc will occur.
+     */
+    void reserve(int n) {
+        int avail = fAllocator->currentBlock()->template avail<alignof(T)>() / sizeof(T);
+        if (n > avail) {
+            int reserved = n - avail;
+            // Don't consider existing bytes since we've already determined how to split the N items
+            fAllocator->template reserve<alignof(T)>(
+                    reserved * sizeof(T), SkBlockAllocator::kIgnoreExistingBytes_Flag);
+        }
+    }
+
+    /**
+     * Remove the last item, only call if count() != 0
+     */
+    void pop_back() {
+        SkASSERT(this->count() > 0);
+
+        SkBlockAllocator::Block* block = fAllocator->currentBlock();
+
+        // Run dtor for the popped item
+        int releaseIndex = Last(block);
+        GetItem(block, releaseIndex).~T();
+
+        if (releaseIndex == First(block)) {
+            fAllocator->releaseBlock(block);
+        } else {
+            // Since this always follows LIFO, the block should always be able to release the memory
+            SkAssertResult(block->release(releaseIndex, releaseIndex + sizeof(T)));
+            block->setMetadata(Decrement(block, releaseIndex));
+        }
+
+        fAllocator->setMetadata(fAllocator->metadata() - 1);
+    }
+
+    /**
+     * Removes all added items.
+     */
+    void reset() {
+        // Invoke destructors in reverse order if not trivially destructible
+        if constexpr (!std::is_trivially_destructible<T>::value) {
+            for (T& t : this->ritems()) {
+                t.~T();
+            }
+        }
+
+        fAllocator->reset();
+    }
+
+    /**
+     * Returns the item count.
+     */
+    int count() const {
+#ifdef SK_DEBUG
+        // Confirm total count matches sum of block counts
+        int count = 0;
+        for (const auto* b :fAllocator->blocks()) {
+            if (b->metadata() == 0) {
+                continue; // skip empty
+            }
+            count += (sizeof(T) + Last(b) - First(b)) / sizeof(T);
+        }
+        SkASSERT(count == fAllocator->metadata());
+#endif
+        return fAllocator->metadata();
+    }
+
+    /**
+     * Is the count 0?
+     */
+    bool empty() const { return this->count() == 0; }
+
+    /**
+     * Access first item, only call if count() != 0
+     */
+    T& front() {
+        // This assumes that the head block actually have room to store the first item.
+        static_assert(StartingItems >= 1);
+        SkASSERT(this->count() > 0 && fAllocator->headBlock()->metadata() > 0);
+        return GetItem(fAllocator->headBlock(), First(fAllocator->headBlock()));
+    }
+    const T& front() const {
+        SkASSERT(this->count() > 0 && fAllocator->headBlock()->metadata() > 0);
+        return GetItem(fAllocator->headBlock(), First(fAllocator->headBlock()));
+    }
+
+    /**
+     * Access last item, only call if count() != 0
+     */
+    T& back() {
+        SkASSERT(this->count() > 0 && fAllocator->currentBlock()->metadata() > 0);
+        return GetItem(fAllocator->currentBlock(), Last(fAllocator->currentBlock()));
+    }
+    const T& back() const {
+        SkASSERT(this->count() > 0 && fAllocator->currentBlock()->metadata() > 0);
+        return GetItem(fAllocator->currentBlock(), Last(fAllocator->currentBlock()));
+    }
+
+    /**
+     * Access item by index. Not an operator[] since it should not be considered constant time.
+     * Use for-range loops by calling items() or ritems() instead to access all added items in order
+     */
+    T& item(int i) {
+        SkASSERT(i >= 0 && i < this->count());
+
+        // Iterate over blocks until we find the one that contains i.
+        for (auto* b : fAllocator->blocks()) {
+            if (b->metadata() == 0) {
+                continue; // skip empty
+            }
+
+            int start = First(b);
+            int end = Last(b) + sizeof(T); // exclusive
+            int index = start + i * sizeof(T);
+            if (index < end) {
+                return GetItem(b, index);
+            } else {
+                i -= (end - start) / sizeof(T);
+            }
+        }
+        SkUNREACHABLE;
+    }
+    const T& item(int i) const {
+        return const_cast<SkTBlockList*>(this)->item(i);
+    }
+
+private:
+    // Let other SkTBlockLists have access (only ever used when T and S are the same but you
+    // cannot have partial specializations declared as a friend...)
+    template<typename S, int N> friend class SkTBlockList;
+    friend class TBlockListTestAccess;  // for fAllocator
+
+    inline static constexpr size_t StartingSize =
+            SkBlockAllocator::Overhead<alignof(T)>() + StartingItems * sizeof(T);
+
+    static T& GetItem(SkBlockAllocator::Block* block, int index) {
+        return *static_cast<T*>(block->ptr(index));
+    }
+    static const T& GetItem(const SkBlockAllocator::Block* block, int index) {
+        return *static_cast<const T*>(block->ptr(index));
+    }
+    static int First(const SkBlockAllocator::Block* b) {
+        return b->firstAlignedOffset<alignof(T)>();
+    }
+    static int Last(const SkBlockAllocator::Block* b) {
+        return b->metadata();
+    }
+    static int Increment(const SkBlockAllocator::Block* b, int index) {
+        return index + sizeof(T);
+    }
+    static int Decrement(const SkBlockAllocator::Block* b, int index) {
+        return index - sizeof(T);
+    }
+
+    void* pushItem() {
+        // 'template' required because fAllocator is a template, calling a template member
+        auto br = fAllocator->template allocate<alignof(T)>(sizeof(T));
+        SkASSERT(br.fStart == br.fAlignedOffset ||
+                 br.fAlignedOffset == First(fAllocator->currentBlock()));
+        br.fBlock->setMetadata(br.fAlignedOffset);
+        fAllocator->setMetadata(fAllocator->metadata() + 1);
+        return br.fBlock->ptr(br.fAlignedOffset);
+    }
+
+    // N represents the number of items, whereas SkSBlockAllocator takes total bytes, so must
+    // account for the block allocator's size too.
+    //
+    // This class uses the SkBlockAllocator's metadata to track total count of items, and per-block
+    // metadata to track the index of the last allocated item within each block.
+    SkSBlockAllocator<StartingSize> fAllocator;
+
+public:
+    using Iter   = BlockIndexIterator<T&,       true,  false, &First, &Last,  &Increment, &GetItem>;
+    using CIter  = BlockIndexIterator<const T&, true,  true,  &First, &Last,  &Increment, &GetItem>;
+    using RIter  = BlockIndexIterator<T&,       false, false, &Last,  &First, &Decrement, &GetItem>;
+    using CRIter = BlockIndexIterator<const T&, false, true,  &Last,  &First, &Decrement, &GetItem>;
+
+    /**
+     * Iterate over all items in allocation order (oldest to newest) using a for-range loop:
+     *
+     *   for (auto&& T : this->items()) {}
+     */
+    Iter   items() { return Iter(fAllocator.allocator()); }
+    CIter  items() const { return CIter(fAllocator.allocator()); }
+
+    // Iterate from newest to oldest using a for-range loop.
+    RIter  ritems() { return RIter(fAllocator.allocator()); }
+    CRIter ritems() const { return CRIter(fAllocator.allocator()); }
+};
+
+template <typename T, int SI1>
+template <int SI2>
+void SkTBlockList<T, SI1>::concat(SkTBlockList<T, SI2>&& other) {
+    // Optimize the common case where the list to append only has a single item
+    if (other.empty()) {
+        return;
+    } else if (other.count() == 1) {
+        this->push_back(other.back());
+        other.pop_back();
+        return;
+    }
+
+    // Manually move all items in other's head block into this list; all heap blocks from 'other'
+    // will be appended to the block linked list (no per-item moves needed then).
+    int headItemCount = 0;
+    SkBlockAllocator::Block* headBlock = other.fAllocator->headBlock();
+    SkDEBUGCODE(int oldCount = this->count();)
+    if (headBlock->metadata() > 0) {
+        int headStart = First(headBlock);
+        int headEnd = Last(headBlock) + sizeof(T); // exclusive
+        headItemCount = (headEnd - headStart) / sizeof(T);
+        int avail = fAllocator->currentBlock()->template avail<alignof(T)>() / sizeof(T);
+        if (headItemCount > avail) {
+            // Make sure there is extra room for the items beyond what's already avail. Use the
+            // kIgnoreGrowthPolicy_Flag to make this reservation as tight as possible since
+            // 'other's heap blocks will be appended after it and any extra space is wasted.
+            fAllocator->template reserve<alignof(T)>((headItemCount - avail) * sizeof(T),
+                                                     SkBlockAllocator::kIgnoreExistingBytes_Flag |
+                                                     SkBlockAllocator::kIgnoreGrowthPolicy_Flag);
+        }
+
+        if constexpr (std::is_trivially_copy_constructible<T>::value) {
+            // memcpy all items at once (or twice between current and reserved space).
+            SkASSERT(std::is_trivially_destructible<T>::value);
+            auto copy = [](SkBlockAllocator::Block* src, int start, SkBlockAllocator* dst, int n) {
+                auto target = dst->template allocate<alignof(T)>(n * sizeof(T));
+                memcpy(target.fBlock->ptr(target.fAlignedOffset), src->ptr(start), n * sizeof(T));
+                target.fBlock->setMetadata(target.fAlignedOffset + (n - 1) * sizeof(T));
+            };
+
+            if (avail > 0) {
+                // Copy 0 to avail items into existing tail block
+                copy(headBlock, headStart, fAllocator.allocator(), std::min(headItemCount, avail));
+            }
+            if (headItemCount > avail) {
+                // Copy (head count - avail) into the extra reserved space
+                copy(headBlock, headStart + avail * sizeof(T),
+                     fAllocator.allocator(), headItemCount - avail);
+            }
+            fAllocator->setMetadata(fAllocator->metadata() + headItemCount);
+        } else {
+            // Move every item over one at a time
+            for (int i = headStart; i < headEnd; i += sizeof(T)) {
+                T& toMove = GetItem(headBlock, i);
+                this->push_back(std::move(toMove));
+                // Anything of interest should have been moved, but run this since T isn't
+                // a trusted type.
+                toMove.~T(); // NOLINT(bugprone-use-after-move): calling dtor always allowed
+            }
+        }
+
+        other.fAllocator->releaseBlock(headBlock);
+    }
+
+    // other's head block must have been fully copied since it cannot be stolen
+    SkASSERT(other.fAllocator->headBlock()->metadata() == 0 &&
+             fAllocator->metadata() == oldCount + headItemCount);
+    fAllocator->stealHeapBlocks(other.fAllocator.allocator());
+    fAllocator->setMetadata(fAllocator->metadata() +
+                            (other.fAllocator->metadata() - headItemCount));
+    other.fAllocator->setMetadata(0);
+}
+
+/**
+ * BlockIndexIterator provides a reusable iterator template for collections built on top of a
+ * SkBlockAllocator, where each item is of the same type, and the index to an item can be iterated
+ * over in a known manner. It supports const and non-const, and forward and reverse, assuming it's
+ * provided with proper functions for starting, ending, and advancing.
+ */
+template <typename T,    // The element type (including any modifiers)
+          bool Forward,  // Are indices within a block increasing or decreasing with iteration?
+          bool Const,    // Whether or not T is const
+          IndexFn Start, // Returns the index of the first valid item in a block
+          IndexFn End,   // Returns the index of the last valid item (so it is inclusive)
+          NextFn Next,   // Returns the next index given the current index
+          ItemFn<T, typename std::conditional<Const, const SkBlockAllocator::Block,
+                                                     SkBlockAllocator::Block>::type> Resolve>
+class BlockIndexIterator {
+    using BlockIter = typename SkBlockAllocator::BlockIter<Forward, Const>;
+public:
+    BlockIndexIterator(BlockIter iter) : fBlockIter(iter) {}
+
+    class Item {
+    public:
+        bool operator!=(const Item& other) const {
+            return other.fBlock != fBlock || (SkToBool(*fBlock) && other.fIndex != fIndex);
+        }
+
+        T operator*() const {
+            SkASSERT(*fBlock);
+            return Resolve(*fBlock, fIndex);
+        }
+
+        Item& operator++() {
+            const auto* block = *fBlock;
+            SkASSERT(block && block->metadata() > 0);
+            SkASSERT((Forward && Next(block, fIndex) > fIndex) ||
+                     (!Forward && Next(block, fIndex) < fIndex));
+            fIndex = Next(block, fIndex);
+            if ((Forward && fIndex > fEndIndex) || (!Forward && fIndex < fEndIndex)) {
+                ++fBlock;
+                this->setIndices();
+            }
+            return *this;
+        }
+
+    private:
+        friend BlockIndexIterator;
+        using BlockItem = typename BlockIter::Item;
+
+        Item(BlockItem block) : fBlock(block) {
+            this->setIndices();
+        }
+
+        void setIndices() {
+            // Skip empty blocks
+            while(*fBlock && (*fBlock)->metadata() == 0) {
+                ++fBlock;
+            }
+            if (*fBlock) {
+                fIndex = Start(*fBlock);
+                fEndIndex = End(*fBlock);
+            } else {
+                fIndex = 0;
+                fEndIndex = 0;
+            }
+
+            SkASSERT((Forward && fIndex <= fEndIndex) || (!Forward && fIndex >= fEndIndex));
+        }
+
+        BlockItem fBlock;
+        int       fIndex;
+        int       fEndIndex;
+    };
+
+    Item begin() const { return Item(fBlockIter.begin()); }
+    Item end() const { return Item(fBlockIter.end()); }
+
+private:
+    BlockIter fBlockIter;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTDArray.cpp b/gfx/skia/skia/src/base/SkTDArray.cpp
new file mode 100644
index 0000000000..2cf7780f95
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTDArray.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkTDArray.h"
+
+#include "include/private/base/SkMalloc.h"
+#include "include/private/base/SkTFitsIn.h"
+#include "include/private/base/SkTo.h"
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
+
+SkTDStorage::SkTDStorage(int sizeOfT) : fSizeOfT{sizeOfT} {}
+
+SkTDStorage::SkTDStorage(const void* src, int size, int sizeOfT)
+        : fSizeOfT{sizeOfT}
+        , fCapacity{size}
+        , fSize{size} {
+    if (size > 0) {
+        SkASSERT(src != nullptr);
+        size_t storageSize = this->bytes(size);
+        fStorage = static_cast<std::byte*>(sk_malloc_throw(storageSize));
+        memcpy(fStorage, src, storageSize);
+    }
+}
+
+SkTDStorage::SkTDStorage(const SkTDStorage& that)
+        : SkTDStorage{that.fStorage, that.fSize, that.fSizeOfT} {}
+
+SkTDStorage& SkTDStorage::operator=(const SkTDStorage& that) {
+    if (this != &that) {
+        if (that.fSize <= fCapacity) {
+            fSize = that.fSize;
+            if (fSize > 0) {
+                memcpy(fStorage, that.data(), that.size_bytes());
+            }
+        } else {
+            *this = SkTDStorage{that.data(), that.size(), that.fSizeOfT};
+        }
+    }
+    return *this;
+}
+
+SkTDStorage::SkTDStorage(SkTDStorage&& that)
+        : fSizeOfT{that.fSizeOfT}
+        , fStorage(std::exchange(that.fStorage, nullptr))
+        , fCapacity{that.fCapacity}
+        , fSize{that.fSize} {}
+
+SkTDStorage& SkTDStorage::operator=(SkTDStorage&& that) {
+    if (this != &that) {
+        this->~SkTDStorage();
+        new (this) SkTDStorage{std::move(that)};
+    }
+    return *this;
+}
+
+SkTDStorage::~SkTDStorage() {
+    sk_free(fStorage);
+}
+
+void SkTDStorage::reset() {
+    const int sizeOfT = fSizeOfT;
+    this->~SkTDStorage();
+    new (this) SkTDStorage{sizeOfT};
+}
+
+void SkTDStorage::swap(SkTDStorage& that) {
+    SkASSERT(fSizeOfT == that.fSizeOfT);
+    using std::swap;
+    swap(fStorage, that.fStorage);
+    swap(fCapacity, that.fCapacity);
+    swap(fSize, that.fSize);
+}
+
+void SkTDStorage::resize(int newSize) {
+    SkASSERT(newSize >= 0);
+    if (newSize > fCapacity) {
+        this->reserve(newSize);
+    }
+    fSize = newSize;
+}
+
+void SkTDStorage::reserve(int newCapacity) {
+    SkASSERT(newCapacity >= 0);
+    if (newCapacity > fCapacity) {
+        // Establish the maximum number of elements that includes a valid count for end. In the
+        // largest case end() = &fArray[INT_MAX] which is 1 after the last indexable element.
+        static constexpr int kMaxCount = INT_MAX;
+
+        // Assume that the array will max out.
+        int expandedReserve = kMaxCount;
+        if (kMaxCount - newCapacity > 4) {
+            // Add 1/4 more than we need. Add 4 to ensure this grows by at least 1. Pin to
+            // kMaxCount if no room for 1/4 growth.
+            int growth = 4 + ((newCapacity + 4) >> 2);
+            // Read this line as: if (count + growth < kMaxCount) { ... }
+            // It's rewritten to avoid signed integer overflow.
+            if (kMaxCount - newCapacity > growth) {
+                expandedReserve = newCapacity + growth;
+            }
+        }
+
+
+        // With a T size of 1, the above allocator produces the progression of 7, 15, ... Since,
+        // the sizeof max_align_t is often 16, there is no reason to allocate anything less than
+        // 16 bytes. This eliminates a realloc when pushing back bytes to an SkTDArray.
+        if (fSizeOfT == 1) {
+            // Round up to the multiple of 16.
+            expandedReserve = (expandedReserve + 15) & ~15;
+        }
+
+        fCapacity = expandedReserve;
+        size_t newStorageSize = this->bytes(fCapacity);
+        fStorage = static_cast<std::byte*>(sk_realloc_throw(fStorage, newStorageSize));
+    }
+}
+
+void SkTDStorage::shrink_to_fit() {
+    if (fCapacity != fSize) {
+        fCapacity = fSize;
+        // Because calling realloc with size of 0 is implementation defined, force to a good state
+        // by freeing fStorage.
+        if (fCapacity > 0) {
+            fStorage = static_cast<std::byte*>(sk_realloc_throw(fStorage, this->bytes(fCapacity)));
+        } else {
+            sk_free(fStorage);
+            fStorage = nullptr;
+        }
+    }
+}
+
+void SkTDStorage::erase(int index, int count) {
+    SkASSERT(count >= 0);
+    SkASSERT(fSize >= count);
+    SkASSERT(0 <= index && index <= fSize);
+
+    if (count > 0) {
+        // Check that the resulting size fits in an int. This will abort if not.
+        const int newCount = this->calculateSizeOrDie(-count);
+        this->moveTail(index, index + count, fSize);
+        this->resize(newCount);
+    }
+}
+
+void SkTDStorage::removeShuffle(int index) {
+    SkASSERT(fSize > 0);
+    SkASSERT(0 <= index && index < fSize);
+    // Check that the new count is valid.
+    const int newCount = this->calculateSizeOrDie(-1);
+    this->moveTail(index, fSize - 1, fSize);
+    this->resize(newCount);
+}
+
+void* SkTDStorage::prepend() {
+    return this->insert(/*index=*/0);
+}
+
+void SkTDStorage::append() {
+    if (fSize < fCapacity) {
+        fSize++;
+    } else {
+        this->insert(fSize);
+    }
+}
+
+void SkTDStorage::append(int count) {
+    SkASSERT(count >= 0);
+    // Read as: if (fSize + count <= fCapacity) {...}. This is a UB safe way to avoid the add.
+    if (fCapacity - fSize >= count) {
+        fSize += count;
+    } else {
+        this->insert(fSize, count, nullptr);
+    }
+}
+
+void* SkTDStorage::append(const void* src, int count) {
+    return this->insert(fSize, count, src);
+}
+
+void* SkTDStorage::insert(int index) {
+    return this->insert(index, /*count=*/1, nullptr);
+}
+
+void* SkTDStorage::insert(int index, int count, const void* src) {
+    SkASSERT(0 <= index && index <= fSize);
+    SkASSERT(count >= 0);
+
+    if (count > 0) {
+        const int oldCount = fSize;
+        const int newCount = this->calculateSizeOrDie(count);
+        this->resize(newCount);
+        this->moveTail(index + count, index, oldCount);
+
+        if (src != nullptr) {
+            this->copySrc(index, src, count);
+        }
+    }
+
+    return this->address(index);
+}
+
+bool operator==(const SkTDStorage& a, const SkTDStorage& b) {
+    return a.size() == b.size() &&
+           (a.size() == 0 || !memcmp(a.data(), b.data(), a.bytes(a.size())));
+}
+
+int SkTDStorage::calculateSizeOrDie(int delta) {
+    // Check that count will not go negative.
+    SkASSERT_RELEASE(-fSize <= delta);
+
+    // We take care to avoid overflow here.
+    // Because count and delta are both signed 32-bit ints, the sum of count and delta is at
+    // most 4294967294, which fits fine in uint32_t. Proof follows in assert.
+    static_assert(UINT32_MAX >= (uint32_t)INT_MAX + (uint32_t)INT_MAX);
+    uint32_t testCount = (uint32_t)fSize + (uint32_t)delta;
+    SkASSERT_RELEASE(SkTFitsIn<int>(testCount));
+    return SkToInt(testCount);
+}
+
+void SkTDStorage::moveTail(int to, int tailStart, int tailEnd) {
+    SkASSERT(0 <= to && to <= fSize);
+    SkASSERT(0 <= tailStart && tailStart <= tailEnd && tailEnd <= fSize);
+    if (to != tailStart && tailStart != tailEnd) {
+        this->copySrc(to, this->address(tailStart), tailEnd - tailStart);
+    }
+}
+
+void SkTDStorage::copySrc(int dstIndex, const void* src, int count) {
+    SkASSERT(count > 0);
+    memmove(this->address(dstIndex), src, this->bytes(count));
+}
diff --git a/gfx/skia/skia/src/base/SkTDPQueue.h b/gfx/skia/skia/src/base/SkTDPQueue.h
new file mode 100644
index 0000000000..3a897130f2
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTDPQueue.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTDPQueue_DEFINED
+#define SkTDPQueue_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkTDArray.h"
+#include "include/private/base/SkTo.h"
+#include "src/base/SkTSort.h"
+
+#include <utility>
+
+/**
+ * This class implements a priority queue. T is the type of the elements in the queue. LESS is a
+ * function that compares two Ts and returns true if the first is higher priority than the second.
+ *
+ * Optionally objects may know their index into the priority queue. The queue will update the index
+ * as the objects move through the queue. This is enabled by using a non-nullptr function for INDEX.
+ * When an INDEX function is provided random deletes from the queue are allowed using remove().
+ * Additionally, the * priority is allowed to change as long as priorityDidChange() is called
+ * afterwards. In debug builds the index will be set to -1 before an element is removed from the
+ * queue.
+ */
+template <typename T,
+          bool (*LESS)(const T&, const T&),
+          int* (*INDEX)(const T&) = (int* (*)(const T&))nullptr>
+class SkTDPQueue {
+public:
+    SkTDPQueue() {}
+    SkTDPQueue(int reserve) { fArray.reserve(reserve); }
+
+    SkTDPQueue(SkTDPQueue&&) = default;
+    SkTDPQueue& operator =(SkTDPQueue&&) = default;
+
+    SkTDPQueue(const SkTDPQueue&) = delete;
+    SkTDPQueue& operator=(const SkTDPQueue&) = delete;
+
+    /** Number of items in the queue. */
+    int count() const { return fArray.size(); }
+
+    /** Gets the next item in the queue without popping it. */
+    const T& peek() const { return fArray[0]; }
+    T& peek() { return fArray[0]; }
+
+    /** Removes the next item. */
+    void pop() {
+        this->validate();
+        SkDEBUGCODE(if (SkToBool(INDEX)) { *INDEX(fArray[0]) = -1; })
+        if (1 == fArray.size()) {
+            fArray.pop_back();
+            return;
+        }
+
+        fArray[0] = fArray[fArray.size() - 1];
+        this->setIndex(0);
+        fArray.pop_back();
+        this->percolateDownIfNecessary(0);
+
+        this->validate();
+    }
+
+    /** Inserts a new item in the queue based on its priority. */
+    void insert(T entry) {
+        this->validate();
+        int index = fArray.size();
+        *fArray.append() = entry;
+        this->setIndex(fArray.size() - 1);
+        this->percolateUpIfNecessary(index);
+        this->validate();
+    }
+
+    /** Random access removal. This requires that the INDEX function is non-nullptr. */
+    void remove(T entry) {
+        SkASSERT(nullptr != INDEX);
+        int index = *INDEX(entry);
+        SkASSERT(index >= 0 && index < fArray.size());
+        this->validate();
+        SkDEBUGCODE(*INDEX(fArray[index]) = -1;)
+        if (index == fArray.size() - 1) {
+            fArray.pop_back();
+            return;
+        }
+        fArray[index] = fArray[fArray.size() - 1];
+        fArray.pop_back();
+        this->setIndex(index);
+        this->percolateUpOrDown(index);
+        this->validate();
+    }
+
+    /** Notification that the priority of an entry has changed. This must be called after an
+        item's priority is changed to maintain correct ordering. Changing the priority is only
+        allowed if an INDEX function is provided. */
+    void priorityDidChange(T entry) {
+        SkASSERT(nullptr != INDEX);
+        int index = *INDEX(entry);
+        SkASSERT(index >= 0 && index < fArray.size());
+        this->validate(index);
+        this->percolateUpOrDown(index);
+        this->validate();
+    }
+
+    /** Gets the item at index i in the priority queue (for i < this->count()). at(0) is equivalent
+        to peek(). Otherwise, there is no guarantee about ordering of elements in the queue. */
+    T at(int i) const { return fArray[i]; }
+
+    /** Sorts the queue into priority order.  The queue is only guarenteed to remain in sorted order
+     *  until any other operation, other than at(), is performed.
+     */
+    void sort() {
+        if (fArray.size() > 1) {
+            SkTQSort<T>(fArray.begin(), fArray.end(), LESS);
+            for (int i = 0; i < fArray.size(); i++) {
+                this->setIndex(i);
+            }
+            this->validate();
+        }
+    }
+
+private:
+    static int LeftOf(int x) { SkASSERT(x >= 0); return 2 * x + 1; }
+    static int ParentOf(int x) { SkASSERT(x > 0); return (x - 1) >> 1; }
+
+    void percolateUpOrDown(int index) {
+        SkASSERT(index >= 0);
+        if (!percolateUpIfNecessary(index)) {
+            this->validate(index);
+            this->percolateDownIfNecessary(index);
+        }
+    }
+
+    bool percolateUpIfNecessary(int index) {
+        SkASSERT(index >= 0);
+        bool percolated = false;
+        do {
+            if (0 == index) {
+                this->setIndex(index);
+                return percolated;
+            }
+            int p = ParentOf(index);
+            if (LESS(fArray[index], fArray[p])) {
+                using std::swap;
+                swap(fArray[index], fArray[p]);
+                this->setIndex(index);
+                index = p;
+                percolated = true;
+            } else {
+                this->setIndex(index);
+                return percolated;
+            }
+            this->validate(index);
+        } while (true);
+    }
+
+    void percolateDownIfNecessary(int index) {
+        SkASSERT(index >= 0);
+        do {
+            int child = LeftOf(index);
+
+            if (child >= fArray.size()) {
+                // We're a leaf.
+                this->setIndex(index);
+                return;
+            }
+
+            if (child + 1 >= fArray.size()) {
+                // We only have a left child.
+                if (LESS(fArray[child], fArray[index])) {
+                    using std::swap;
+                    swap(fArray[child], fArray[index]);
+                    this->setIndex(child);
+                    this->setIndex(index);
+                    return;
+                }
+            } else if (LESS(fArray[child + 1], fArray[child])) {
+                // The right child is the one we should swap with, if we swap.
+                child++;
+            }
+
+            // Check if we need to swap.
+            if (LESS(fArray[child], fArray[index])) {
+                using std::swap;
+                swap(fArray[child], fArray[index]);
+                this->setIndex(index);
+                index = child;
+            } else {
+                // We're less than both our children.
+                this->setIndex(index);
+                return;
+            }
+            this->validate(index);
+        } while (true);
+    }
+
+    void setIndex(int index) {
+        SkASSERT(index < fArray.size());
+        if (SkToBool(INDEX)) {
+            *INDEX(fArray[index]) = index;
+        }
+    }
+
+    void validate(int excludedIndex = -1) const {
+#ifdef SK_DEBUG
+        for (int i = 1; i < fArray.size(); ++i) {
+            int p = ParentOf(i);
+            if (excludedIndex != p && excludedIndex != i) {
+                SkASSERT(!(LESS(fArray[i], fArray[p])));
+                SkASSERT(!SkToBool(INDEX) || *INDEX(fArray[i]) == i);
+            }
+        }
+#endif
+    }
+
+    SkTDArray<T> fArray;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTInternalLList.h b/gfx/skia/skia/src/base/SkTInternalLList.h
new file mode 100644
index 0000000000..5b655a35eb
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTInternalLList.h
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTInternalLList_DEFINED
+#define SkTInternalLList_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkTo.h"
+
+/**
+ * This macro creates the member variables required by the SkTInternalLList class. It should be
+ * placed in the private section of any class that will be stored in a double linked list.
+ */
+#define SK_DECLARE_INTERNAL_LLIST_INTERFACE(ClassName)              \
+    friend class SkTInternalLList<ClassName>;                       \
+    /* back pointer to the owning list - for debugging */           \
+    SkDEBUGCODE(SkTInternalLList<ClassName>* fList = nullptr;)      \
+    ClassName* fPrev = nullptr;                                     \
+    ClassName* fNext = nullptr
+
+/**
+ * This class implements a templated internal doubly linked list data structure.
+ */
+template <class T> class SkTInternalLList {
+public:
+    SkTInternalLList() {}
+
+    void reset() {
+        fHead = nullptr;
+        fTail = nullptr;
+    }
+
+    void remove(T* entry) {
+        SkASSERT(fHead && fTail);
+        SkASSERT(this->isInList(entry));
+
+        T* prev = entry->fPrev;
+        T* next = entry->fNext;
+
+        if (prev) {
+            prev->fNext = next;
+        } else {
+            fHead = next;
+        }
+        if (next) {
+            next->fPrev = prev;
+        } else {
+            fTail = prev;
+        }
+
+        entry->fPrev = nullptr;
+        entry->fNext = nullptr;
+
+#ifdef SK_DEBUG
+        entry->fList = nullptr;
+#endif
+    }
+
+    void addToHead(T* entry) {
+        SkASSERT(nullptr == entry->fPrev && nullptr == entry->fNext);
+        SkASSERT(nullptr == entry->fList);
+
+        entry->fPrev = nullptr;
+        entry->fNext = fHead;
+        if (fHead) {
+            fHead->fPrev = entry;
+        }
+        fHead = entry;
+        if (nullptr == fTail) {
+            fTail = entry;
+        }
+
+#ifdef SK_DEBUG
+        entry->fList = this;
+#endif
+    }
+
+    void addToTail(T* entry) {
+        SkASSERT(nullptr == entry->fPrev && nullptr == entry->fNext);
+        SkASSERT(nullptr == entry->fList);
+
+        entry->fPrev = fTail;
+        entry->fNext = nullptr;
+        if (fTail) {
+            fTail->fNext = entry;
+        }
+        fTail = entry;
+        if (nullptr == fHead) {
+            fHead = entry;
+        }
+
+#ifdef SK_DEBUG
+        entry->fList = this;
+#endif
+    }
+
+    /**
+     * Inserts a new list entry before an existing list entry. The new entry must not already be
+     * a member of this or any other list. If existingEntry is NULL then the new entry is added
+     * at the tail.
+     */
+    void addBefore(T* newEntry, T* existingEntry) {
+        SkASSERT(newEntry);
+
+        if (nullptr == existingEntry) {
+            this->addToTail(newEntry);
+            return;
+        }
+
+        SkASSERT(this->isInList(existingEntry));
+        newEntry->fNext = existingEntry;
+        T* prev = existingEntry->fPrev;
+        existingEntry->fPrev = newEntry;
+        newEntry->fPrev = prev;
+        if (nullptr == prev) {
+            SkASSERT(fHead == existingEntry);
+            fHead = newEntry;
+        } else {
+            prev->fNext = newEntry;
+        }
+#ifdef SK_DEBUG
+        newEntry->fList = this;
+#endif
+    }
+
+    /**
+     * Inserts a new list entry after an existing list entry. The new entry must not already be
+     * a member of this or any other list. If existingEntry is NULL then the new entry is added
+     * at the head.
+     */
+    void addAfter(T* newEntry, T* existingEntry) {
+        SkASSERT(newEntry);
+
+        if (nullptr == existingEntry) {
+            this->addToHead(newEntry);
+            return;
+        }
+
+        SkASSERT(this->isInList(existingEntry));
+        newEntry->fPrev = existingEntry;
+        T* next = existingEntry->fNext;
+        existingEntry->fNext = newEntry;
+        newEntry->fNext = next;
+        if (nullptr == next) {
+            SkASSERT(fTail == existingEntry);
+            fTail = newEntry;
+        } else {
+            next->fPrev = newEntry;
+        }
+#ifdef SK_DEBUG
+        newEntry->fList = this;
+#endif
+    }
+
+    void concat(SkTInternalLList&& list) {
+        if (list.isEmpty()) {
+            return;
+        }
+
+        list.fHead->fPrev = fTail;
+        if (!fHead) {
+            SkASSERT(!list.fHead->fPrev);
+            fHead = list.fHead;
+        } else {
+            SkASSERT(fTail);
+            fTail->fNext = list.fHead;
+        }
+        fTail = list.fTail;
+
+#ifdef SK_DEBUG
+        for (T* node = list.fHead; node; node = node->fNext) {
+            SkASSERT(node->fList == &list);
+            node->fList = this;
+        }
+#endif
+
+        list.fHead = list.fTail = nullptr;
+    }
+
+    bool isEmpty() const {
+        SkASSERT(SkToBool(fHead) == SkToBool(fTail));
+        return !fHead;
+    }
+
+    T* head() const { return fHead; }
+    T* tail() const { return fTail; }
+
+    class Iter {
+    public:
+        enum IterStart {
+            kHead_IterStart,
+            kTail_IterStart
+        };
+
+        Iter() : fCurr(nullptr) {}
+        Iter(const Iter& iter) : fCurr(iter.fCurr) {}
+        Iter& operator= (const Iter& iter) { fCurr = iter.fCurr; return *this; }
+
+        T* init(const SkTInternalLList& list, IterStart startLoc) {
+            if (kHead_IterStart == startLoc) {
+                fCurr = list.fHead;
+            } else {
+                SkASSERT(kTail_IterStart == startLoc);
+                fCurr = list.fTail;
+            }
+
+            return fCurr;
+        }
+
+        T* get() { return fCurr; }
+
+        /**
+         * Return the next/previous element in the list or NULL if at the end.
+         */
+        T* next() {
+            if (nullptr == fCurr) {
+                return nullptr;
+            }
+
+            fCurr = fCurr->fNext;
+            return fCurr;
+        }
+
+        T* prev() {
+            if (nullptr == fCurr) {
+                return nullptr;
+            }
+
+            fCurr = fCurr->fPrev;
+            return fCurr;
+        }
+
+        /**
+         * C++11 range-for interface.
+         */
+        bool operator!=(const Iter& that) { return fCurr != that.fCurr; }
+        T* operator*() { return this->get(); }
+        void operator++() { this->next(); }
+
+    private:
+        T* fCurr;
+    };
+
+    Iter begin() const {
+        Iter iter;
+        iter.init(*this, Iter::kHead_IterStart);
+        return iter;
+    }
+
+    Iter end() const { return Iter(); }
+
+#ifdef SK_DEBUG
+    void validate() const {
+        SkASSERT(!fHead == !fTail);
+        Iter iter;
+        for (T* item = iter.init(*this, Iter::kHead_IterStart); item; item = iter.next()) {
+            SkASSERT(this->isInList(item));
+            if (nullptr == item->fPrev) {
+                SkASSERT(fHead == item);
+            } else {
+                SkASSERT(item->fPrev->fNext == item);
+            }
+            if (nullptr == item->fNext) {
+                SkASSERT(fTail == item);
+            } else {
+                SkASSERT(item->fNext->fPrev == item);
+            }
+        }
+    }
+
+    /**
+     * Debugging-only method that uses the list back pointer to check if 'entry' is indeed in 'this'
+     * list.
+     */
+    bool isInList(const T* entry) const {
+        return entry->fList == this;
+    }
+
+    /**
+     * Debugging-only method that laboriously counts the list entries.
+     */
+    int countEntries() const {
+        int count = 0;
+        for (T* entry = fHead; entry; entry = entry->fNext) {
+            ++count;
+        }
+        return count;
+    }
+#endif // SK_DEBUG
+
+private:
+    T* fHead = nullptr;
+    T* fTail = nullptr;
+
+    SkTInternalLList(const SkTInternalLList&) = delete;
+    SkTInternalLList& operator=(const SkTInternalLList&) = delete;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTLazy.h b/gfx/skia/skia/src/base/SkTLazy.h
new file mode 100644
index 0000000000..38b3b373db
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTLazy.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTLazy_DEFINED
+#define SkTLazy_DEFINED
+
+#include "include/private/base/SkAssert.h"
+
+#include <optional>
+#include <utility>
+
+/**
+ *  Efficient way to defer allocating/initializing a class until it is needed
+ *  (if ever).
+ */
+template <typename T> class SkTLazy {
+public:
+    SkTLazy() = default;
+    explicit SkTLazy(const T* src) : fValue(src ? std::optional<T>(*src) : std::nullopt) {}
+    SkTLazy(const SkTLazy& that) : fValue(that.fValue) {}
+    SkTLazy(SkTLazy&& that) : fValue(std::move(that.fValue)) {}
+
+    ~SkTLazy() = default;
+
+    SkTLazy& operator=(const SkTLazy& that) {
+        fValue = that.fValue;
+        return *this;
+    }
+
+    SkTLazy& operator=(SkTLazy&& that) {
+        fValue = std::move(that.fValue);
+        return *this;
+    }
+
+    /**
+     *  Return a pointer to an instance of the class initialized with 'args'.
+     *  If a previous instance had been initialized (either from init() or
+     *  set()) it will first be destroyed, so that a freshly initialized
+     *  instance is always returned.
+     */
+    template <typename... Args> T* init(Args&&... args) {
+        fValue.emplace(std::forward<Args>(args)...);
+        return this->get();
+    }
+
+    /**
+     *  Copy src into this, and return a pointer to a copy of it. Note this
+     *  will always return the same pointer, so if it is called on a lazy that
+     *  has already been initialized, then this will copy over the previous
+     *  contents.
+     */
+    T* set(const T& src) {
+        fValue = src;
+        return this->get();
+    }
+
+    T* set(T&& src) {
+        fValue = std::move(src);
+        return this->get();
+    }
+
+    /**
+     * Destroy the lazy object (if it was created via init() or set())
+     */
+    void reset() {
+        fValue.reset();
+    }
+
+    /**
+     *  Returns true if a valid object has been initialized in the SkTLazy,
+     *  false otherwise.
+     */
+    bool isValid() const { return fValue.has_value(); }
+
+    /**
+     * Returns the object. This version should only be called when the caller
+     * knows that the object has been initialized.
+     */
+    T* get() {
+        SkASSERT(fValue.has_value());
+        return &fValue.value();
+    }
+    const T* get() const {
+        SkASSERT(fValue.has_value());
+        return &fValue.value();
+    }
+
+    T* operator->() { return this->get(); }
+    const T* operator->() const { return this->get(); }
+
+    T& operator*() {
+        SkASSERT(fValue.has_value());
+        return *fValue;
+    }
+    const T& operator*() const {
+        SkASSERT(fValue.has_value());
+        return *fValue;
+    }
+
+    /**
+     * Like above but doesn't assert if object isn't initialized (in which case
+     * nullptr is returned).
+     */
+    const T* getMaybeNull() const { return fValue.has_value() ? this->get() : nullptr; }
+          T* getMaybeNull()       { return fValue.has_value() ? this->get() : nullptr; }
+
+private:
+    std::optional<T> fValue;
+};
+
+/**
+ * A helper built on top of std::optional to do copy-on-first-write. The object is initialized
+ * with a const pointer but provides a non-const pointer accessor. The first time the
+ * accessor is called (if ever) the object is cloned.
+ *
+ * In the following example at most one copy of constThing is made:
+ *
+ * SkTCopyOnFirstWrite<Thing> thing(&constThing);
+ * ...
+ * function_that_takes_a_const_thing_ptr(thing); // constThing is passed
+ * ...
+ * if (need_to_modify_thing()) {
+ *    thing.writable()->modifyMe(); // makes a copy of constThing
+ * }
+ * ...
+ * x = thing->readSomething();
+ * ...
+ * if (need_to_modify_thing_now()) {
+ *    thing.writable()->changeMe(); // makes a copy of constThing if we didn't call modifyMe()
+ * }
+ *
+ * consume_a_thing(thing); // could be constThing or a modified copy.
+ */
+template <typename T>
+class SkTCopyOnFirstWrite {
+public:
+    explicit SkTCopyOnFirstWrite(const T& initial) : fObj(&initial) {}
+
+    explicit SkTCopyOnFirstWrite(const T* initial) : fObj(initial) {}
+
+    // Constructor for delayed initialization.
+    SkTCopyOnFirstWrite() : fObj(nullptr) {}
+
+    SkTCopyOnFirstWrite(const SkTCopyOnFirstWrite&  that) { *this = that;            }
+    SkTCopyOnFirstWrite(      SkTCopyOnFirstWrite&& that) { *this = std::move(that); }
+
+    SkTCopyOnFirstWrite& operator=(const SkTCopyOnFirstWrite& that) {
+        fLazy = that.fLazy;
+        fObj  = fLazy.has_value() ? &fLazy.value() : that.fObj;
+        return *this;
+    }
+
+    SkTCopyOnFirstWrite& operator=(SkTCopyOnFirstWrite&& that) {
+        fLazy = std::move(that.fLazy);
+        fObj  = fLazy.has_value() ? &fLazy.value() : that.fObj;
+        return *this;
+    }
+
+    // Should only be called once, and only if the default constructor was used.
+    void init(const T& initial) {
+        SkASSERT(!fObj);
+        SkASSERT(!fLazy.has_value());
+        fObj = &initial;
+    }
+
+    // If not already initialized, in-place instantiates the writable object
+    template <typename... Args>
+    void initIfNeeded(Args&&... args) {
+        if (!fObj) {
+            SkASSERT(!fLazy.has_value());
+            fObj = &fLazy.emplace(std::forward<Args>(args)...);
+        }
+    }
+
+    /**
+     * Returns a writable T*. The first time this is called the initial object is cloned.
+     */
+    T* writable() {
+        SkASSERT(fObj);
+        if (!fLazy.has_value()) {
+            fLazy = *fObj;
+            fObj = &fLazy.value();
+        }
+        return &fLazy.value();
+    }
+
+    const T* get() const { return fObj; }
+
+    /**
+     * Operators for treating this as though it were a const pointer.
+     */
+
+    const T *operator->() const { return fObj; }
+
+    operator const T*() const { return fObj; }
+
+    const T& operator *() const { return *fObj; }
+
+private:
+    const T*         fObj;
+    std::optional<T> fLazy;
+};
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTSearch.cpp b/gfx/skia/skia/src/base/SkTSearch.cpp
new file mode 100644
index 0000000000..d91772e03b
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTSearch.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#include "src/base/SkTSearch.h"
+
+#include "include/private/base/SkMalloc.h"
+
+#include <cstring>
+#include <ctype.h>
+
+static inline const char* index_into_base(const char*const* base, int index,
+                                          size_t elemSize)
+{
+    return *(const char*const*)((const char*)base + index * elemSize);
+}
+
+int SkStrSearch(const char*const* base, int count, const char target[],
+                size_t target_len, size_t elemSize)
+{
+    if (count <= 0)
+        return ~0;
+
+    SkASSERT(base != nullptr);
+
+    int lo = 0;
+    int hi = count - 1;
+
+    while (lo < hi)
+    {
+        int mid = (hi + lo) >> 1;
+        const char* elem = index_into_base(base, mid, elemSize);
+
+        int cmp = strncmp(elem, target, target_len);
+        if (cmp < 0)
+            lo = mid + 1;
+        else if (cmp > 0 || strlen(elem) > target_len)
+            hi = mid;
+        else
+            return mid;
+    }
+
+    const char* elem = index_into_base(base, hi, elemSize);
+    int cmp = strncmp(elem, target, target_len);
+    if (cmp || strlen(elem) > target_len)
+    {
+        if (cmp < 0)
+            hi += 1;
+        hi = ~hi;
+    }
+    return hi;
+}
+
+int SkStrSearch(const char*const* base, int count, const char target[],
+                size_t elemSize)
+{
+    return SkStrSearch(base, count, target, strlen(target), elemSize);
+}
+
+int SkStrLCSearch(const char*const* base, int count, const char target[],
+                  size_t len, size_t elemSize)
+{
+    SkASSERT(target);
+
+    SkAutoAsciiToLC tolc(target, len);
+
+    return SkStrSearch(base, count, tolc.lc(), len, elemSize);
+}
+
+int SkStrLCSearch(const char*const* base, int count, const char target[],
+                  size_t elemSize)
+{
+    return SkStrLCSearch(base, count, target, strlen(target), elemSize);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+SkAutoAsciiToLC::SkAutoAsciiToLC(const char str[], size_t len)
+{
+    // see if we need to compute the length
+    if ((long)len < 0) {
+        len = strlen(str);
+    }
+    fLength = len;
+
+    // assign lc to our preallocated storage if len is small enough, or allocate
+    // it on the heap
+    char*   lc;
+    if (len <= STORAGE) {
+        lc = fStorage;
+    } else {
+        lc = (char*)sk_malloc_throw(len + 1);
+    }
+    fLC = lc;
+
+    // convert any asii to lower-case. we let non-ascii (utf8) chars pass
+    // through unchanged
+    for (int i = (int)(len - 1); i >= 0; --i) {
+        int c = str[i];
+        if ((c & 0x80) == 0) {   // is just ascii
+            c = tolower(c);
+        }
+        lc[i] = c;
+    }
+    lc[len] = 0;
+}
+
+SkAutoAsciiToLC::~SkAutoAsciiToLC()
+{
+    if (fLC != fStorage) {
+        sk_free(fLC);
+    }
+}
diff --git a/gfx/skia/skia/src/base/SkTSearch.h b/gfx/skia/skia/src/base/SkTSearch.h
new file mode 100644
index 0000000000..6ebd304029
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTSearch.h
@@ -0,0 +1,132 @@
+
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#ifndef SkTSearch_DEFINED
+#define SkTSearch_DEFINED
+
+#include "include/private/base/SkAssert.h"
+
+#include <cstddef>
+
+/**
+ *  All of the SkTSearch variants want to return the index (0...N-1) of the
+ *  found element, or the bit-not of where to insert the element.
+ *
+ *  At a simple level, if the return value is negative, it was not found.
+ *
+ *  For clients that want to insert the new element if it was not found, use
+ *  the following logic:
+ *
+ *  int index = SkTSearch(...);
+ *  if (index >= 0) {
+ *      // found at index
+ *  } else {
+ *      index = ~index; // now we are positive
+ *      // insert at index
+ *  }
+ */
+
+
+// The most general form of SkTSearch takes an array of T and a key of type K. A functor, less, is
+// used to perform comparisons. It has two function operators:
+//      bool operator() (const T& t, const K& k)
+//      bool operator() (const K& t, const T& k)
+template <typename T, typename K, typename LESS>
+int SkTSearch(const T base[], int count, const K& key, size_t elemSize, const LESS& less)
+{
+    SkASSERT(count >= 0);
+    if (count <= 0) {
+        return ~0;
+    }
+
+    SkASSERT(base != nullptr); // base may be nullptr if count is zero
+
+    int lo = 0;
+    int hi = count - 1;
+
+    while (lo < hi) {
+        int mid = lo + ((hi - lo) >> 1);
+        const T* elem = (const T*)((const char*)base + mid * elemSize);
+
+        if (less(*elem, key))
+            lo = mid + 1;
+        else
+            hi = mid;
+    }
+
+    const T* elem = (const T*)((const char*)base + hi * elemSize);
+    if (less(*elem, key)) {
+        hi += 1;
+        hi = ~hi;
+    } else if (less(key, *elem)) {
+        hi = ~hi;
+    }
+    return hi;
+}
+
+// Specialization for case when T==K and the caller wants to use a function rather than functor.
+template <typename T, bool (LESS)(const T&, const T&)>
+int SkTSearch(const T base[], int count, const T& target, size_t elemSize) {
+    return SkTSearch(base, count, target, elemSize,
+                     [](const T& a, const T& b) { return LESS(a, b); });
+}
+
+// Specialization for T==K, compare using op <.
+template <typename T>
+int SkTSearch(const T base[], int count, const T& target, size_t elemSize) {
+    return SkTSearch(base, count, target, elemSize, [](const T& a, const T& b) { return a < b; });
+}
+
+// Specialization for case where domain is an array of T* and the key value is a T*, and you want
+// to compare the T objects, not the pointers.
+template <typename T, bool (LESS)(const T&, const T&)>
+int SkTSearch(T* base[], int count, T* target, size_t elemSize) {
+    return SkTSearch(base, count, target, elemSize,
+                     [](const T* t, const T* k) { return LESS(*t, *k); });
+}
+
+int SkStrSearch(const char*const* base, int count, const char target[],
+                size_t target_len, size_t elemSize);
+int SkStrSearch(const char*const* base, int count, const char target[],
+                size_t elemSize);
+
+/** Like SkStrSearch, but treats target as if it were all lower-case. Assumes that
+    base points to a table of lower-case strings.
+*/
+int SkStrLCSearch(const char*const* base, int count, const char target[],
+                  size_t target_len, size_t elemSize);
+int SkStrLCSearch(const char*const* base, int count, const char target[],
+                  size_t elemSize);
+
+/** Helper class to convert a string to lower-case, but only modifying the ascii
+    characters. This makes the routine very fast and never changes the string
+    length, but it is not suitable for linguistic purposes. Normally this is
+    used for buiding and searching string tables.
+*/
+class SkAutoAsciiToLC {
+public:
+    SkAutoAsciiToLC(const char str[], size_t len = (size_t)-1);
+    ~SkAutoAsciiToLC();
+
+    const char* lc() const { return fLC; }
+    size_t      length() const { return fLength; }
+
+private:
+    char*   fLC;    // points to either the heap or fStorage
+    size_t  fLength;
+    enum {
+        STORAGE = 64
+    };
+    char    fStorage[STORAGE+1];
+};
+
+// Helper when calling qsort with a compare proc that has typed its arguments
+#define SkCastForQSort(compare) reinterpret_cast<int (*)(const void*, const void*)>(compare)
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkTSort.h b/gfx/skia/skia/src/base/SkTSort.h
new file mode 100644
index 0000000000..a1d35cc158
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkTSort.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkTSort_DEFINED
+#define SkTSort_DEFINED
+
+#include "include/private/base/SkTo.h"
+#include "src/base/SkMathPriv.h"
+
+#include <cstddef>
+#include <utility>
+
+///////////////////////////////////////////////////////////////////////////////
+
+/*  Sifts a broken heap. The input array is a heap from root to bottom
+ *  except that the root entry may be out of place.
+ *
+ *  Sinks a hole from array[root] to leaf and then sifts the original array[root] element
+ *  from the leaf level up.
+ *
+ *  This version does extra work, in that it copies child to parent on the way down,
+ *  then copies parent to child on the way back up. When copies are inexpensive,
+ *  this is an optimization as this sift variant should only be used when
+ *  the potentially out of place root entry value is expected to be small.
+ *
+ *  @param root the one based index into array of the out-of-place root of the heap.
+ *  @param bottom the one based index in the array of the last entry in the heap.
+ */
+template <typename T, typename C>
+void SkTHeapSort_SiftUp(T array[], size_t root, size_t bottom, const C& lessThan) {
+    T x = array[root-1];
+    size_t start = root;
+    size_t j = root << 1;
+    while (j <= bottom) {
+        if (j < bottom && lessThan(array[j-1], array[j])) {
+            ++j;
+        }
+        array[root-1] = array[j-1];
+        root = j;
+        j = root << 1;
+    }
+    j = root >> 1;
+    while (j >= start) {
+        if (lessThan(array[j-1], x)) {
+            array[root-1] = array[j-1];
+            root = j;
+            j = root >> 1;
+        } else {
+            break;
+        }
+    }
+    array[root-1] = x;
+}
+
+/*  Sifts a broken heap. The input array is a heap from root to bottom
+ *  except that the root entry may be out of place.
+ *
+ *  Sifts the array[root] element from the root down.
+ *
+ *  @param root the one based index into array of the out-of-place root of the heap.
+ *  @param bottom the one based index in the array of the last entry in the heap.
+ */
+template <typename T, typename C>
+void SkTHeapSort_SiftDown(T array[], size_t root, size_t bottom, const C& lessThan) {
+    T x = array[root-1];
+    size_t child = root << 1;
+    while (child <= bottom) {
+        if (child < bottom && lessThan(array[child-1], array[child])) {
+            ++child;
+        }
+        if (lessThan(x, array[child-1])) {
+            array[root-1] = array[child-1];
+            root = child;
+            child = root << 1;
+        } else {
+            break;
+        }
+    }
+    array[root-1] = x;
+}
+
+/** Sorts the array of size count using comparator lessThan using a Heap Sort algorithm. Be sure to
+ *  specialize swap if T has an efficient swap operation.
+ *
+ *  @param array the array to be sorted.
+ *  @param count the number of elements in the array.
+ *  @param lessThan a functor with bool operator()(T a, T b) which returns true if a comes before b.
+ */
+template <typename T, typename C> void SkTHeapSort(T array[], size_t count, const C& lessThan) {
+    for (size_t i = count >> 1; i > 0; --i) {
+        SkTHeapSort_SiftDown(array, i, count, lessThan);
+    }
+
+    for (size_t i = count - 1; i > 0; --i) {
+        using std::swap;
+        swap(array[0], array[i]);
+        SkTHeapSort_SiftUp(array, 1, i, lessThan);
+    }
+}
+
+/** Sorts the array of size count using comparator '<' using a Heap Sort algorithm. */
+template <typename T> void SkTHeapSort(T array[], size_t count) {
+    SkTHeapSort(array, count, [](const T& a, const T& b) { return a < b; });
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+/** Sorts the array of size count using comparator lessThan using an Insertion Sort algorithm. */
+template <typename T, typename C>
+void SkTInsertionSort(T* left, int count, const C& lessThan) {
+    T* right = left + count - 1;
+    for (T* next = left + 1; next <= right; ++next) {
+        if (!lessThan(*next, *(next - 1))) {
+            continue;
+        }
+        T insert = std::move(*next);
+        T* hole = next;
+        do {
+            *hole = std::move(*(hole - 1));
+            --hole;
+        } while (left < hole && lessThan(insert, *(hole - 1)));
+        *hole = std::move(insert);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename T, typename C>
+T* SkTQSort_Partition(T* left, int count, T* pivot, const C& lessThan) {
+    T* right = left + count - 1;
+    using std::swap;
+    T pivotValue = *pivot;
+    swap(*pivot, *right);
+    T* newPivot = left;
+    while (left < right) {
+        if (lessThan(*left, pivotValue)) {
+            swap(*left, *newPivot);
+            newPivot += 1;
+        }
+        left += 1;
+    }
+    swap(*newPivot, *right);
+    return newPivot;
+}
+
+/*  Introsort is a modified Quicksort.
+ *  When the region to be sorted is a small constant size, it uses Insertion Sort.
+ *  When depth becomes zero, it switches over to Heap Sort.
+ *  This implementation recurses on the left region after pivoting and loops on the right,
+ *    we already limit the stack depth by switching to heap sort,
+ *    and cache locality on the data appears more important than saving a few stack frames.
+ *
+ *  @param depth at this recursion depth, switch to Heap Sort.
+ *  @param left points to the beginning of the region to be sorted
+ *  @param count number of items to be sorted
+ *  @param lessThan  a functor/lambda which returns true if a comes before b.
+ */
+template <typename T, typename C>
+void SkTIntroSort(int depth, T* left, int count, const C& lessThan) {
+    for (;;) {
+        if (count <= 32) {
+            SkTInsertionSort(left, count, lessThan);
+            return;
+        }
+
+        if (depth == 0) {
+            SkTHeapSort<T>(left, count, lessThan);
+            return;
+        }
+        --depth;
+
+        T* middle = left + ((count - 1) >> 1);
+        T* pivot = SkTQSort_Partition(left, count, middle, lessThan);
+        int pivotCount = pivot - left;
+
+        SkTIntroSort(depth, left, pivotCount, lessThan);
+        left += pivotCount + 1;
+        count -= pivotCount + 1;
+    }
+}
+
+/** Sorts the region from left to right using comparator lessThan using Introsort.
+ *  Be sure to specialize `swap` if T has an efficient swap operation.
+ *
+ *  @param begin points to the beginning of the region to be sorted
+ *  @param end points past the end of the region to be sorted
+ *  @param lessThan a functor/lambda which returns true if a comes before b.
+ */
+template <typename T, typename C>
+void SkTQSort(T* begin, T* end, const C& lessThan) {
+    int n = SkToInt(end - begin);
+    if (n <= 1) {
+        return;
+    }
+    // Limit Introsort recursion depth to no more than 2 * ceil(log2(n-1)).
+    int depth = 2 * SkNextLog2(n - 1);
+    SkTIntroSort(depth, begin, n, lessThan);
+}
+
+/** Sorts the region from left to right using comparator 'a < b' using Introsort. */
+template <typename T> void SkTQSort(T* begin, T* end) {
+    SkTQSort(begin, end, [](const T& a, const T& b) { return a < b; });
+}
+
+/** Sorts the region from left to right using comparator '*a < *b' using Introsort. */
+template <typename T> void SkTQSort(T** begin, T** end) {
+    SkTQSort(begin, end, [](const T* a, const T* b) { return *a < *b; });
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkThreadID.cpp b/gfx/skia/skia/src/base/SkThreadID.cpp
new file mode 100644
index 0000000000..e5b7a06c7c
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkThreadID.cpp
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "include/private/base/SkThreadID.h"
+
+#ifdef SK_BUILD_FOR_WIN
+    #include "src/base/SkLeanWindows.h"
+    SkThreadID SkGetThreadID() { return GetCurrentThreadId(); }
+#else
+    #include <pthread.h>
+    SkThreadID SkGetThreadID() { return (int64_t)pthread_self(); }
+#endif
diff --git a/gfx/skia/skia/src/base/SkUTF.cpp b/gfx/skia/skia/src/base/SkUTF.cpp
new file mode 100644
index 0000000000..20325fb2b6
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkUTF.cpp
@@ -0,0 +1,316 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+
+#include "src/base/SkUTF.h"
+
+#include "include/private/base/SkTFitsIn.h"
+
+static constexpr inline int32_t left_shift(int32_t value, int32_t shift) {
+    return (int32_t) ((uint32_t) value << shift);
+}
+
+template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); }
+
+template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); }
+
+static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; }
+
+static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; }
+
+/** @returns   -1  iff invalid UTF8 byte,
+                0  iff UTF8 continuation byte,
+                1  iff ASCII byte,
+                2  iff leading byte of 2-byte sequence,
+                3  iff leading byte of 3-byte sequence, and
+                4  iff leading byte of 4-byte sequence.
+      I.e.: if return value > 0, then gives length of sequence.
+*/
+static int utf8_byte_type(uint8_t c) {
+    if (c < 0x80) {
+        return 1;
+    } else if (c < 0xC0) {
+        return 0;
+    } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear"
+        return -1;
+    } else {
+        int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1;
+        // assert(value >= 2 && value <=4);
+        return value;
+    }
+}
+static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; }
+
+static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; }
+
+////////////////////////////////////////////////////////////////////////////////
+
+int SkUTF::CountUTF8(const char* utf8, size_t byteLength) {
+    if (!utf8 && byteLength) {
+        return -1;
+    }
+    int count = 0;
+    const char* stop = utf8 + byteLength;
+    while (utf8 < stop) {
+        int type = utf8_byte_type(*(const uint8_t*)utf8);
+        if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) {
+            return -1;  // Sequence extends beyond end.
+        }
+        while(type-- > 1) {
+            ++utf8;
+            if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) {
+                return -1;
+            }
+        }
+        ++utf8;
+        ++count;
+    }
+    return count;
+}
+
+int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) {
+    if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) {
+        return -1;
+    }
+    const uint16_t* src = (const uint16_t*)utf16;
+    const uint16_t* stop = src + (byteLength >> 1);
+    int count = 0;
+    while (src < stop) {
+        unsigned c = *src++;
+        if (utf16_is_low_surrogate(c)) {
+            return -1;
+        }
+        if (utf16_is_high_surrogate(c)) {
+            if (src >= stop) {
+                return -1;
+            }
+            c = *src++;
+            if (!utf16_is_low_surrogate(c)) {
+                return -1;
+            }
+        }
+        count += 1;
+    }
+    return count;
+}
+
+int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) {
+    if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || !SkTFitsIn<int>(byteLength >> 2)) {
+        return -1;
+    }
+    const uint32_t kInvalidUnicharMask = 0xFF000000;    // unichar fits in 24 bits
+    const uint32_t* ptr = (const uint32_t*)utf32;
+    const uint32_t* stop = ptr + (byteLength >> 2);
+    while (ptr < stop) {
+        if (*ptr & kInvalidUnicharMask) {
+            return -1;
+        }
+        ptr += 1;
+    }
+    return (int)(byteLength >> 2);
+}
+
+template <typename T>
+static SkUnichar next_fail(const T** ptr, const T* end) {
+    *ptr = end;
+    return -1;
+}
+
+SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) {
+    if (!ptr || !end ) {
+        return -1;
+    }
+    const uint8_t*  p = (const uint8_t*)*ptr;
+    if (!p || p >= (const uint8_t*)end) {
+        return next_fail(ptr, end);
+    }
+    int             c = *p;
+    int             hic = c << 24;
+
+    if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) {
+        return next_fail(ptr, end);
+    }
+    if (hic < 0) {
+        uint32_t mask = (uint32_t)~0x3F;
+        hic = left_shift(hic, 1);
+        do {
+            ++p;
+            if (p >= (const uint8_t*)end) {
+                return next_fail(ptr, end);
+            }
+            // check before reading off end of array.
+            uint8_t nextByte = *p;
+            if (!utf8_byte_is_continuation(nextByte)) {
+                return next_fail(ptr, end);
+            }
+            c = (c << 6) | (nextByte & 0x3F);
+            mask <<= 5;
+        } while ((hic = left_shift(hic, 1)) < 0);
+        c &= ~mask;
+    }
+    *ptr = (char*)p + 1;
+    return c;
+}
+
+SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) {
+    if (!ptr || !end ) {
+        return -1;
+    }
+    const uint16_t* src = *ptr;
+    if (!src || src + 1 > end || !is_align2(intptr_t(src))) {
+        return next_fail(ptr, end);
+    }
+    uint16_t c = *src++;
+    SkUnichar result = c;
+    if (utf16_is_low_surrogate(c)) {
+        return next_fail(ptr, end);  // srcPtr should never point at low surrogate.
+    }
+    if (utf16_is_high_surrogate(c)) {
+        if (src + 1 > end) {
+            return next_fail(ptr, end);  // Truncated string.
+        }
+        uint16_t low = *src++;
+        if (!utf16_is_low_surrogate(low)) {
+            return next_fail(ptr, end);
+        }
+        /*
+        [paraphrased from wikipedia]
+        Take the high surrogate and subtract 0xD800, then multiply by 0x400.
+        Take the low surrogate and subtract 0xDC00.  Add these two results
+        together, and finally add 0x10000 to get the final decoded codepoint.
+
+        unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
+        unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000
+        unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000
+        unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000)
+        */
+        result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000);
+    }
+    *ptr = src;
+    return result;
+}
+
+SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) {
+    if (!ptr || !end ) {
+        return -1;
+    }
+    const int32_t* s = *ptr;
+    if (!s || s + 1 > end || !is_align4(intptr_t(s))) {
+        return next_fail(ptr, end);
+    }
+    int32_t value = *s;
+    const uint32_t kInvalidUnicharMask = 0xFF000000;    // unichar fits in 24 bits
+    if (value & kInvalidUnicharMask) {
+        return next_fail(ptr, end);
+    }
+    *ptr = s + 1;
+    return value;
+}
+
+size_t SkUTF::ToUTF8(SkUnichar uni, char utf8[SkUTF::kMaxBytesInUTF8Sequence]) {
+    if ((uint32_t)uni > 0x10FFFF) {
+        return 0;
+    }
+    if (uni <= 127) {
+        if (utf8) {
+            *utf8 = (char)uni;
+        }
+        return 1;
+    }
+    char    tmp[4];
+    char*   p = tmp;
+    size_t  count = 1;
+    while (uni > 0x7F >> count) {
+        *p++ = (char)(0x80 | (uni & 0x3F));
+        uni >>= 6;
+        count += 1;
+    }
+    if (utf8) {
+        p = tmp;
+        utf8 += count;
+        while (p < tmp + count - 1) {
+            *--utf8 = *p++;
+        }
+        *--utf8 = (char)(~(0xFF >> count) | uni);
+    }
+    return count;
+}
+
+size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
+    if ((uint32_t)uni > 0x10FFFF) {
+        return 0;
+    }
+    int extra = (uni > 0xFFFF);
+    if (utf16) {
+        if (extra) {
+            utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10));
+            utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF));
+        } else {
+            utf16[0] = (uint16_t)uni;
+        }
+    }
+    return 1 + extra;
+}
+
+int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
+    if (!dst) {
+        dstCapacity = 0;
+    }
+
+    int dstLength = 0;
+    uint16_t* endDst = dst + dstCapacity;
+    const char* endSrc = src + srcByteLength;
+    while (src < endSrc) {
+        SkUnichar uni = NextUTF8(&src, endSrc);
+        if (uni < 0) {
+            return -1;
+        }
+
+        uint16_t utf16[2];
+        size_t count = ToUTF16(uni, utf16);
+        if (count == 0) {
+            return -1;
+        }
+        dstLength += count;
+
+        if (dst) {
+            uint16_t* elems = utf16;
+            while (dst < endDst && count > 0) {
+                *dst++ = *elems++;
+                count -= 1;
+            }
+        }
+    }
+    return dstLength;
+}
+
+int SkUTF::UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength) {
+    if (!dst) {
+        dstCapacity = 0;
+    }
+
+    int dstLength = 0;
+    const char* endDst = dst + dstCapacity;
+    const uint16_t* endSrc = src + srcLength;
+    while (src < endSrc) {
+        SkUnichar uni = NextUTF16(&src, endSrc);
+        if (uni < 0) {
+            return -1;
+        }
+
+        char utf8[SkUTF::kMaxBytesInUTF8Sequence];
+        size_t count = ToUTF8(uni, utf8);
+        if (count == 0) {
+            return -1;
+        }
+        dstLength += count;
+
+        if (dst) {
+            const char* elems = utf8;
+            while (dst < endDst && count > 0) {
+                *dst++ = *elems++;
+                count -= 1;
+            }
+        }
+    }
+    return dstLength;
+}
diff --git a/gfx/skia/skia/src/base/SkUTF.h b/gfx/skia/skia/src/base/SkUTF.h
new file mode 100644
index 0000000000..e50804da98
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkUTF.h
@@ -0,0 +1,95 @@
+// Copyright 2018 Google LLC.
+// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
+#ifndef SkUTF_DEFINED
+#define SkUTF_DEFINED
+
+#include "include/private/base/SkAPI.h"
+
+#include <cstddef>
+#include <cstdint>
+
+typedef int32_t SkUnichar;
+
+namespace SkUTF {
+
+/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
+    If the sequence is invalid UTF-8, return -1.
+*/
+SK_SPI int CountUTF8(const char* utf8, size_t byteLength);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+    return the number of unicode codepoints.  If the sequence is invalid
+    UTF-16, return -1.
+*/
+SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+    return the number of unicode codepoints.  If the sequence is invalid
+    UTF-32, return -1.
+*/
+SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength);
+
+/** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
+    The pointer will be incremented to point at the next codepoint's start.  If
+    invalid UTF-8 is encountered, set *ptr to end and return -1.
+*/
+SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end);
+
+/** Given a sequence of aligned UTF-16 characters in machine-endian form,
+    return the first unicode codepoint.  The pointer will be incremented to
+    point at the next codepoint's start.  If invalid UTF-16 is encountered,
+    set *ptr to end and return -1.
+*/
+SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
+
+/** Given a sequence of aligned UTF-32 characters in machine-endian form,
+    return the first unicode codepoint.  The pointer will be incremented to
+    point at the next codepoint's start.  If invalid UTF-32 is encountered,
+    set *ptr to end and return -1.
+*/
+SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
+
+constexpr unsigned kMaxBytesInUTF8Sequence = 4;
+
+/** Convert the unicode codepoint into UTF-8.  If `utf8` is non-null, place the
+    result in that array.  Return the number of bytes in the result.  If `utf8`
+    is null, simply return the number of bytes that would be used.  For invalid
+    unicode codepoints, return 0.
+*/
+SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
+
+/** Convert the unicode codepoint into UTF-16.  If `utf16` is non-null, place
+    the result in that array.  Return the number of UTF-16 code units in the
+    result (1 or 2).  If `utf16` is null, simply return the number of code
+    units that would be used.  For invalid unicode codepoints, return 0.
+*/
+SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
+
+/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
+ *  If dst is not null, it is filled with the corresponding values up to its capacity.
+ *  If there is an error, -1 is returned and the dst[] buffer is undefined.
+ */
+SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
+
+/** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence.
+ *  If dst is not null, it is filled with the corresponding values up to its capacity.
+ *  If there is an error, -1 is returned and the dst[] buffer is undefined.
+ */
+SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength);
+
+/**
+ * Given a UTF-16 code point, returns true iff it is a leading surrogate.
+ * https://unicode.org/faq/utf_bom.html#utf16-2
+ */
+static inline bool IsLeadingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xD800; }
+
+/**
+ * Given a UTF-16 code point, returns true iff it is a trailing surrogate.
+ * https://unicode.org/faq/utf_bom.html#utf16-2
+ */
+static inline bool IsTrailingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xDC00; }
+
+
+}  // namespace SkUTF
+
+#endif  // SkUTF_DEFINED
diff --git a/gfx/skia/skia/src/base/SkUtils.cpp b/gfx/skia/skia/src/base/SkUtils.cpp
new file mode 100644
index 0000000000..b9852e9389
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkUtils.cpp
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/base/SkUtils.h"
+
+const char SkHexadecimalDigits::gUpper[16] =
+    { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+const char SkHexadecimalDigits::gLower[16] =
+    { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
diff --git a/gfx/skia/skia/src/base/SkUtils.h b/gfx/skia/skia/src/base/SkUtils.h
new file mode 100644
index 0000000000..ae2331dfca
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkUtils.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2006 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkUtils_DEFINED
+#define SkUtils_DEFINED
+
+#include "include/private/base/SkAttributes.h"
+
+#include <cstring>
+#include <type_traits> // is_trivially_copyable
+
+namespace SkHexadecimalDigits {
+    extern const char gUpper[16];  // 0-9A-F
+    extern const char gLower[16];  // 0-9a-f
+}  // namespace SkHexadecimalDigits
+
+///////////////////////////////////////////////////////////////////////////////
+
+// If T is an 8-byte GCC or Clang vector extension type, it would naturally
+// pass or return in the MMX mm0 register on 32-bit x86 builds.  This has the
+// fun side effect of clobbering any state in the x87 st0 register.  (There is
+// no ABI governing who should preserve mm?/st? registers, so no one does!)
+//
+// We force-inline sk_unaligned_load() and sk_unaligned_store() to avoid that,
+// making them safe to use for all types on all platforms, thus solving the
+// problem once and for all!
+
+template <typename T, typename P>
+static SK_ALWAYS_INLINE T sk_unaligned_load(const P* ptr) {
+    static_assert(std::is_trivially_copyable<T>::value);
+    static_assert(std::is_trivially_copyable<P>::value);
+    T val;
+    memcpy(&val, ptr, sizeof(val));
+    return val;
+}
+
+template <typename T, typename P>
+static SK_ALWAYS_INLINE void sk_unaligned_store(P* ptr, T val) {
+    static_assert(std::is_trivially_copyable<T>::value);
+    static_assert(std::is_trivially_copyable<P>::value);
+    memcpy(ptr, &val, sizeof(val));
+}
+
+// Copy the bytes from src into an instance of type Dst and return it.
+template <typename Dst, typename Src>
+static SK_ALWAYS_INLINE Dst sk_bit_cast(const Src& src) {
+    static_assert(sizeof(Dst) == sizeof(Src));
+    return sk_unaligned_load<Dst>(&src);
+}
+
+#endif
diff --git a/gfx/skia/skia/src/base/SkVx.h b/gfx/skia/skia/src/base/SkVx.h
new file mode 100644
index 0000000000..a1731ad0c4
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkVx.h
@@ -0,0 +1,1183 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SKVX_DEFINED
+#define SKVX_DEFINED
+
+// skvx::Vec<N,T> are SIMD vectors of N T's, a v1.5 successor to SkNx<N,T>.
+//
+// This time we're leaning a bit less on platform-specific intrinsics and a bit
+// more on Clang/GCC vector extensions, but still keeping the option open to
+// drop in platform-specific intrinsics, actually more easily than before.
+//
+// We've also fixed a few of the caveats that used to make SkNx awkward to work
+// with across translation units.  skvx::Vec<N,T> always has N*sizeof(T) size
+// and alignment and is safe to use across translation units freely.
+// (Ideally we'd only align to T, but that tanks ARMv7 NEON codegen.)
+
+// Please try to keep this file independent of Skia headers.
+#include <algorithm>         // std::min, std::max
+#include <cassert>           // assert()
+#include <cmath>             // ceilf, floorf, truncf, roundf, sqrtf, etc.
+#include <cstdint>           // intXX_t
+#include <cstring>           // memcpy()
+#include <initializer_list>  // std::initializer_list
+#include <type_traits>
+#include <utility>           // std::index_sequence
+
+// Users may disable SIMD with SKNX_NO_SIMD, which may be set via compiler flags.
+// The gn build has no option which sets SKNX_NO_SIMD.
+// Use SKVX_USE_SIMD internally to avoid confusing double negation.
+// Do not use 'defined' in a macro expansion.
+#if !defined(SKNX_NO_SIMD)
+    #define SKVX_USE_SIMD 1
+#else
+    #define SKVX_USE_SIMD 0
+#endif
+
+#if SKVX_USE_SIMD
+    #if defined(__SSE__) || defined(__AVX__) || defined(__AVX2__)
+        #include <immintrin.h>
+    #elif defined(__ARM_NEON)
+        #include <arm_neon.h>
+    #elif defined(__wasm_simd128__)
+        #include <wasm_simd128.h>
+    #endif
+#endif
+
+// To avoid ODR violations, all methods must be force-inlined...
+#if defined(_MSC_VER)
+    #define SKVX_ALWAYS_INLINE __forceinline
+#else
+    #define SKVX_ALWAYS_INLINE __attribute__((always_inline))
+#endif
+
+// ... and all standalone functions must be static.  Please use these helpers:
+#define SI    static inline
+#define SIT   template <       typename T> SI
+#define SIN   template <int N            > SI
+#define SINT  template <int N, typename T> SI
+#define SINTU template <int N, typename T, typename U, \
+                        typename=std::enable_if_t<std::is_convertible<U,T>::value>> SI
+
+namespace skvx {
+
+template <int N, typename T>
+struct alignas(N*sizeof(T)) Vec;
+
+template <int... Ix, int N, typename T>
+SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
+
+template <typename D, typename S>
+SI D bit_pun(const S& s) {
+    static_assert(sizeof(D) == sizeof(S));
+    D d;
+    memcpy(&d, &s, sizeof(D));
+    return d;
+}
+
+// All Vec have the same simple memory layout, the same as `T vec[N]`.
+template <int N, typename T>
+struct alignas(N*sizeof(T)) VecStorage {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+
+    Vec<N/2,T> lo, hi;
+};
+
+template <typename T>
+struct VecStorage<4,T> {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y, T z, T w) : lo(x,y), hi(z, w) {}
+    SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {}
+    SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {}
+
+    SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; }
+    SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; }
+    SKVX_ALWAYS_INLINE T& x() { return lo.lo.val; }
+    SKVX_ALWAYS_INLINE T& y() { return lo.hi.val; }
+    SKVX_ALWAYS_INLINE T& z() { return hi.lo.val; }
+    SKVX_ALWAYS_INLINE T& w() { return hi.hi.val; }
+
+    SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; }
+    SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; }
+    SKVX_ALWAYS_INLINE T x() const { return lo.lo.val; }
+    SKVX_ALWAYS_INLINE T y() const { return lo.hi.val; }
+    SKVX_ALWAYS_INLINE T z() const { return hi.lo.val; }
+    SKVX_ALWAYS_INLINE T w() const { return hi.hi.val; }
+
+    // Exchange-based swizzles. These should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
+    SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(bit_pun<Vec<4,T>>(*this)); }
+    SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(bit_pun<Vec<4,T>>(*this)); }
+
+    Vec<2,T> lo, hi;
+};
+
+template <typename T>
+struct VecStorage<2,T> {
+    SKVX_ALWAYS_INLINE VecStorage() = default;
+    SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {}
+    SKVX_ALWAYS_INLINE VecStorage(T x, T y) : lo(x), hi(y) {}
+
+    SKVX_ALWAYS_INLINE T& x() { return lo.val; }
+    SKVX_ALWAYS_INLINE T& y() { return hi.val; }
+
+    SKVX_ALWAYS_INLINE T x() const { return lo.val; }
+    SKVX_ALWAYS_INLINE T y() const { return hi.val; }
+
+    // This exchange-based swizzle should take 1 cycle on NEON and 3 (pipelined) cycles on SSE.
+    SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(bit_pun<Vec<2,T>>(*this)); }
+
+    SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const {
+        return Vec<4,T>(bit_pun<Vec<2,T>>(*this), bit_pun<Vec<2,T>>(*this));
+    }
+
+    Vec<1,T> lo, hi;
+};
+
+// Translate from a value type T to its corresponding Mask, the result of a comparison.
+template <typename T> struct Mask { using type = T; };
+template <> struct Mask<float > { using type = int32_t; };
+template <> struct Mask<double> { using type = int64_t; };
+template <typename T> using M = typename Mask<T>::type;
+
+template <int N, typename T>
+struct NoConversion { T vals[N]; };
+
+template <int N, typename T>
+struct ConvertNative {
+    typedef NoConversion<N, T> type;
+};
+
+#if SKVX_USE_SIMD && defined(__SSE__)
+template<>
+struct ConvertNative<4, float> {
+    typedef __m128 type;
+};
+
+template<>
+struct ConvertNative<4, int32_t> {
+    typedef __m128i type;
+};
+
+template <>
+struct ConvertNative<4, uint32_t> {
+    typedef __m128i type;
+};
+
+template<>
+struct ConvertNative<8, int16_t> {
+    typedef __m128i type;
+};
+
+template <>
+struct ConvertNative<8, uint16_t> {
+    typedef __m128i type;
+};
+
+template <>
+struct ConvertNative<16, uint8_t> {
+    typedef __m128i type;
+};
+#endif
+
+#if SKVX_USE_SIMD && defined(__AVX__)
+template<>
+struct ConvertNative<8, float> {
+    typedef __m256 type;
+};
+
+template<>
+struct ConvertNative<8, int32_t> {
+    typedef __m256i type;
+};
+
+template <>
+struct ConvertNative<8, uint32_t> {
+    typedef __m256i type;
+};
+
+template<>
+struct ConvertNative<16, int16_t> {
+    typedef __m256i type;
+};
+
+template <>
+struct ConvertNative<16, uint16_t> {
+    typedef __m256i type;
+};
+#endif
+
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+template<>
+struct ConvertNative<4, float> {
+    typedef float32x4_t type;
+};
+
+template<>
+struct ConvertNative<4, int32_t> {
+    typedef int32x4_t type;
+};
+
+template <>
+struct ConvertNative<4, uint32_t> {
+    typedef uint32x4_t type;
+};
+
+template<>
+struct ConvertNative<4, int16_t> {
+    typedef int16x4_t type;
+};
+
+template <>
+struct ConvertNative<4, uint16_t> {
+    typedef uint16x4_t type;
+};
+
+template<>
+struct ConvertNative<8, int16_t> {
+    typedef int16x8_t type;
+};
+
+template <>
+struct ConvertNative<8, uint16_t> {
+    typedef uint16x8_t type;
+};
+
+template <>
+struct ConvertNative<8, uint8_t> {
+    typedef uint8x8_t type;
+};
+#endif
+
+template <int N, typename T>
+struct alignas(N*sizeof(T)) Vec : public VecStorage<N,T> {
+    typedef T elem_type;
+
+    static_assert((N & (N-1)) == 0,        "N must be a power of 2.");
+    static_assert(sizeof(T) >= alignof(T), "What kind of unusual T is this?");
+
+    // Methods belong here in the class declaration of Vec only if:
+    //   - they must be here, like constructors or operator[];
+    //   - they'll definitely never want a specialized implementation.
+    // Other operations on Vec should be defined outside the type.
+
+    SKVX_ALWAYS_INLINE Vec() = default;
+    SKVX_ALWAYS_INLINE Vec(typename ConvertNative<N, T>::type native) : Vec(bit_pun<Vec>(native)) {}
+
+    using VecStorage<N,T>::VecStorage;
+
+    // NOTE: Vec{x} produces x000..., whereas Vec(x) produces xxxx.... since this constructor fills
+    // unspecified lanes with 0s, whereas the single T constructor fills all lanes with the value.
+    SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) {
+        T vals[N] = {0};
+        memcpy(vals, xs.begin(), std::min(xs.size(), (size_t)N)*sizeof(T));
+
+        this->lo = Vec<N/2,T>::Load(vals +   0);
+        this->hi = Vec<N/2,T>::Load(vals + N/2);
+    }
+
+    operator typename ConvertNative<N, T>::type() const { return bit_pun<typename ConvertNative<N, T>::type>(*this); }
+
+    SKVX_ALWAYS_INLINE T  operator[](int i) const { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; }
+    SKVX_ALWAYS_INLINE T& operator[](int i)       { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; }
+
+    SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
+        Vec v;
+        memcpy(&v, ptr, sizeof(Vec));
+        return v;
+    }
+    SKVX_ALWAYS_INLINE void store(void* ptr) const {
+        memcpy(ptr, this, sizeof(Vec));
+    }
+};
+
+template <typename T>
+struct Vec<1,T> {
+    typedef T elem_type;
+
+    T val;
+
+    SKVX_ALWAYS_INLINE Vec() = default;
+
+    Vec(T s) : val(s) {}
+
+    SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {}
+
+    SKVX_ALWAYS_INLINE T  operator[](int) const { return val; }
+    SKVX_ALWAYS_INLINE T& operator[](int)       { return val; }
+
+    SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
+        Vec v;
+        memcpy(&v, ptr, sizeof(Vec));
+        return v;
+    }
+    SKVX_ALWAYS_INLINE void store(void* ptr) const {
+        memcpy(ptr, this, sizeof(Vec));
+    }
+};
+
+// Join two Vec<N,T> into one Vec<2N,T>.
+SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) {
+    Vec<2*N,T> v;
+    v.lo = lo;
+    v.hi = hi;
+    return v;
+}
+
+// We have three strategies for implementing Vec operations:
+//    1) lean on Clang/GCC vector extensions when available;
+//    2) use map() to apply a scalar function lane-wise;
+//    3) recurse on lo/hi to scalar portable implementations.
+// We can slot in platform-specific implementations as overloads for particular Vec<N,T>,
+// or often integrate them directly into the recursion of style 3), allowing fine control.
+
+#if SKVX_USE_SIMD && (defined(__clang__) || defined(__GNUC__))
+
+    // VExt<N,T> types have the same size as Vec<N,T> and support most operations directly.
+    #if defined(__clang__)
+        template <int N, typename T>
+        using VExt = T __attribute__((ext_vector_type(N)));
+
+    #elif defined(__GNUC__)
+        template <int N, typename T>
+        struct VExtHelper {
+            typedef T __attribute__((vector_size(N*sizeof(T)))) type;
+        };
+
+        template <int N, typename T>
+        using VExt = typename VExtHelper<N,T>::type;
+
+        // For some reason some (new!) versions of GCC cannot seem to deduce N in the generic
+        // to_vec<N,T>() below for N=4 and T=float.  This workaround seems to help...
+        SI Vec<4,float> to_vec(VExt<4,float> v) { return bit_pun<Vec<4,float>>(v); }
+    #endif
+
+    SINT VExt<N,T> to_vext(const Vec<N,T>& v) { return bit_pun<VExt<N,T>>(v); }
+    SINT Vec <N,T> to_vec(const VExt<N,T>& v) { return bit_pun<Vec <N,T>>(v); }
+
+    SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) + to_vext(y));
+    }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) - to_vext(y));
+    }
+    SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) * to_vext(y));
+    }
+    SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) / to_vext(y));
+    }
+
+    SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) ^ to_vext(y));
+    }
+    SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) & to_vext(y));
+    }
+    SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) | to_vext(y));
+    }
+    SINT Vec<N,T> operator&&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) & to_vext(y));
+    }
+    SINT Vec<N,T> operator||(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return to_vec<N,T>(to_vext(x) | to_vext(y));
+    }
+
+    SINT Vec<N,T> operator!(const Vec<N,T>& x) { return to_vec<N,T>(!to_vext(x)); }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x) { return to_vec<N,T>(-to_vext(x)); }
+    SINT Vec<N,T> operator~(const Vec<N,T>& x) { return to_vec<N,T>(~to_vext(x)); }
+
+    SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) << k); }
+    SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) >> k); }
+
+    SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) == to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) != to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) <= to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) >= to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) <  to_vext(y));
+    }
+    SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return bit_pun<Vec<N,M<T>>>(to_vext(x) >  to_vext(y));
+    }
+
+#else
+
+    // Either SKNX_NO_SIMD is defined, or Clang/GCC vector extensions are not available.
+    // We'll implement things portably with N==1 scalar implementations and recursion onto them.
+
+    // N == 1 scalar implementations.
+    SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; }
+    SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; }
+    SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; }
+    SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; }
+
+    SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; }
+    SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
+    SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
+    SIT Vec<1,T> operator&&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
+    SIT Vec<1,T> operator||(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
+
+    SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; }
+    SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; }
+    SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; }
+
+    SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; }
+    SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; }
+
+    SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val == y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val != y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val <= y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val >= y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val <  y.val ? ~0 : 0;
+    }
+    SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) {
+        return x.val >  y.val ? ~0 : 0;
+    }
+
+    // Recurse on lo/hi down to N==1 scalar implementations.
+    SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo + y.lo, x.hi + y.hi);
+    }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo - y.lo, x.hi - y.hi);
+    }
+    SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo * y.lo, x.hi * y.hi);
+    }
+    SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo / y.lo, x.hi / y.hi);
+    }
+
+    SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo ^ y.lo, x.hi ^ y.hi);
+    }
+    SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo & y.lo, x.hi & y.hi);
+    }
+    SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo | y.lo, x.hi | y.hi);
+    }
+    SINT Vec<N,T> operator&&(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo & y.lo, x.hi & y.hi);
+    }
+    SINT Vec<N,T> operator||(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo | y.lo, x.hi | y.hi);
+    }
+
+    SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); }
+    SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); }
+    SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); }
+
+    SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); }
+    SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); }
+
+    SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo == y.lo, x.hi == y.hi);
+    }
+    SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo != y.lo, x.hi != y.hi);
+    }
+    SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo <= y.lo, x.hi <= y.hi);
+    }
+    SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo >= y.lo, x.hi >= y.hi);
+    }
+    SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo <  y.lo, x.hi <  y.hi);
+    }
+    SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
+        return join(x.lo >  y.lo, x.hi >  y.hi);
+    }
+#endif
+
+// Scalar/vector operations splat the scalar to a vector.
+SINTU Vec<N,T>    operator+ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) +  y; }
+SINTU Vec<N,T>    operator- (U x, const Vec<N,T>& y) { return Vec<N,T>(x) -  y; }
+SINTU Vec<N,T>    operator* (U x, const Vec<N,T>& y) { return Vec<N,T>(x) *  y; }
+SINTU Vec<N,T>    operator/ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) /  y; }
+SINTU Vec<N,T>    operator^ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) ^  y; }
+SINTU Vec<N,T>    operator& (U x, const Vec<N,T>& y) { return Vec<N,T>(x) &  y; }
+SINTU Vec<N,T>    operator| (U x, const Vec<N,T>& y) { return Vec<N,T>(x) |  y; }
+SINTU Vec<N,T>    operator&&(U x, const Vec<N,T>& y) { return Vec<N,T>(x) && y; }
+SINTU Vec<N,T>    operator||(U x, const Vec<N,T>& y) { return Vec<N,T>(x) || y; }
+SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { return Vec<N,T>(x) == y; }
+SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) != y; }
+SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) <= y; }
+SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) >= y; }
+SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { return Vec<N,T>(x) <  y; }
+SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { return Vec<N,T>(x) >  y; }
+
+SINTU Vec<N,T>    operator+ (const Vec<N,T>& x, U y) { return x +  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator- (const Vec<N,T>& x, U y) { return x -  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator* (const Vec<N,T>& x, U y) { return x *  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator/ (const Vec<N,T>& x, U y) { return x /  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator^ (const Vec<N,T>& x, U y) { return x ^  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator& (const Vec<N,T>& x, U y) { return x &  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator| (const Vec<N,T>& x, U y) { return x |  Vec<N,T>(y); }
+SINTU Vec<N,T>    operator&&(const Vec<N,T>& x, U y) { return x && Vec<N,T>(y); }
+SINTU Vec<N,T>    operator||(const Vec<N,T>& x, U y) { return x || Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { return x == Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { return x != Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { return x <= Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { return x >= Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { return x <  Vec<N,T>(y); }
+SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { return x >  Vec<N,T>(y); }
+
+SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x + y); }
+SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x - y); }
+SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x * y); }
+SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x / y); }
+SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x ^ y); }
+SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x & y); }
+SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x | y); }
+
+SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { return (x = x + Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { return (x = x - Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { return (x = x * Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { return (x = x / Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { return (x = x ^ Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { return (x = x & Vec<N,T>(y)); }
+SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { return (x = x | Vec<N,T>(y)); }
+
+SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { return (x = x << bits); }
+SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { return (x = x >> bits); }
+
+// Some operations we want are not expressible with Clang/GCC vector extensions.
+
+// Clang can reason about naive_if_then_else() and optimize through it better
+// than if_then_else(), so it's sometimes useful to call it directly when we
+// think an entire expression should optimize away, e.g. min()/max().
+SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
+    return bit_pun<Vec<N,T>>(( cond & bit_pun<Vec<N, M<T>>>(t)) |
+                             (~cond & bit_pun<Vec<N, M<T>>>(e)) );
+}
+
+SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) {
+    // In practice this scalar implementation is unlikely to be used.  See next if_then_else().
+    return bit_pun<Vec<1,T>>(( cond & bit_pun<Vec<1, M<T>>>(t)) |
+                             (~cond & bit_pun<Vec<1, M<T>>>(e)) );
+}
+SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
+    // Specializations inline here so they can generalize what types the apply to.
+#if SKVX_USE_SIMD && defined(__AVX2__)
+    if constexpr (N*sizeof(T) == 32) {
+        return bit_pun<Vec<N,T>>(_mm256_blendv_epi8(bit_pun<__m256i>(e),
+                                                    bit_pun<__m256i>(t),
+                                                    bit_pun<__m256i>(cond)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE4_1__)
+    if constexpr (N*sizeof(T) == 16) {
+        return bit_pun<Vec<N,T>>(_mm_blendv_epi8(bit_pun<__m128i>(e),
+                                                 bit_pun<__m128i>(t),
+                                                 bit_pun<__m128i>(cond)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+    if constexpr (N*sizeof(T) == 16) {
+        return bit_pun<Vec<N,T>>(vbslq_u8(bit_pun<uint8x16_t>(cond),
+                                          bit_pun<uint8x16_t>(t),
+                                          bit_pun<uint8x16_t>(e)));
+    }
+#endif
+    // Recurse for large vectors to try to hit the specializations above.
+    if constexpr (N*sizeof(T) > 16) {
+        return join(if_then_else(cond.lo, t.lo, e.lo),
+                    if_then_else(cond.hi, t.hi, e.hi));
+    }
+    // This default can lead to better code than the recursing onto scalars.
+    return naive_if_then_else(cond, t, e);
+}
+
+SIT  bool any(const Vec<1,T>& x) { return x.val != 0; }
+SINT bool any(const Vec<N,T>& x) {
+    // For any(), the _mm_testz intrinsics are correct and don't require comparing 'x' to 0, so it's
+    // lower latency compared to _mm_movemask + _mm_compneq on plain SSE.
+#if SKVX_USE_SIMD && defined(__AVX2__)
+    if constexpr (N*sizeof(T) == 32) {
+        return !_mm256_testz_si256(bit_pun<__m256i>(x), _mm256_set1_epi32(-1));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE_4_1__)
+    if constexpr (N*sizeof(T) == 16) {
+        return !_mm_testz_si128(bit_pun<__m128i>(x), _mm_set1_epi32(-1));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE__)
+    if constexpr (N*sizeof(T) == 16) {
+        // On SSE, movemask checks only the MSB in each lane, which is fine if the lanes were set
+        // directly from a comparison op (which sets all bits to 1 when true), but skvx::Vec<>
+        // treats any non-zero value as true, so we have to compare 'x' to 0 before calling movemask
+        return _mm_movemask_ps(_mm_cmpneq_ps(bit_pun<__m128>(x), _mm_set1_ps(0))) != 0b0000;
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    // On 64-bit NEON, take the max across lanes, which will be non-zero if any lane was true.
+    // The specific lane-size doesn't really matter in this case since it's really any set bit
+    // that we're looking for.
+    if constexpr (N*sizeof(T) == 8 ) { return vmaxv_u8 (bit_pun<uint8x8_t> (x)) > 0; }
+    if constexpr (N*sizeof(T) == 16) { return vmaxvq_u8(bit_pun<uint8x16_t>(x)) > 0; }
+#endif
+#if SKVX_USE_SIMD && defined(__wasm_simd128__)
+    if constexpr (N == 4 && sizeof(T) == 4) {
+        return wasm_i32x4_any_true(bit_pun<VExt<4,int>>(x));
+    }
+#endif
+    return any(x.lo)
+        || any(x.hi);
+}
+
+SIT  bool all(const Vec<1,T>& x) { return x.val != 0; }
+SINT bool all(const Vec<N,T>& x) {
+// Unlike any(), we have to respect the lane layout, or we'll miss cases where a
+// true lane has a mix of 0 and 1 bits.
+#if SKVX_USE_SIMD && defined(__SSE__)
+    // Unfortunately, the _mm_testc intrinsics don't let us avoid the comparison to 0 for all()'s
+    // correctness, so always just use the plain SSE version.
+    if constexpr (N == 4 && sizeof(T) == 4) {
+        return _mm_movemask_ps(_mm_cmpneq_ps(bit_pun<__m128>(x), _mm_set1_ps(0))) == 0b1111;
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    // On 64-bit NEON, take the min across the lanes, which will be non-zero if all lanes are != 0.
+    if constexpr (sizeof(T)==1 && N==8)  {return vminv_u8  (bit_pun<uint8x8_t> (x)) > 0;}
+    if constexpr (sizeof(T)==1 && N==16) {return vminvq_u8 (bit_pun<uint8x16_t>(x)) > 0;}
+    if constexpr (sizeof(T)==2 && N==4)  {return vminv_u16 (bit_pun<uint16x4_t>(x)) > 0;}
+    if constexpr (sizeof(T)==2 && N==8)  {return vminvq_u16(bit_pun<uint16x8_t>(x)) > 0;}
+    if constexpr (sizeof(T)==4 && N==2)  {return vminv_u32 (bit_pun<uint32x2_t>(x)) > 0;}
+    if constexpr (sizeof(T)==4 && N==4)  {return vminvq_u32(bit_pun<uint32x4_t>(x)) > 0;}
+#endif
+#if SKVX_USE_SIMD && defined(__wasm_simd128__)
+    if constexpr (N == 4 && sizeof(T) == 4) {
+        return wasm_i32x4_all_true(bit_pun<VExt<4,int>>(x));
+    }
+#endif
+    return all(x.lo)
+        && all(x.hi);
+}
+
+// cast() Vec<N,S> to Vec<N,D>, as if applying a C-cast to each lane.
+// TODO: implement with map()?
+template <typename D, typename S>
+SI Vec<1,D> cast(const Vec<1,S>& src) { return (D)src.val; }
+
+template <typename D, int N, typename S>
+SI Vec<N,D> cast(const Vec<N,S>& src) {
+#if SKVX_USE_SIMD && defined(__clang__)
+    return to_vec(__builtin_convertvector(to_vext(src), VExt<N,D>));
+#else
+    return join(cast<D>(src.lo), cast<D>(src.hi));
+#endif
+}
+
+// min/max match logic of std::min/std::max, which is important when NaN is involved.
+SIT  T min(const Vec<1,T>& x) { return x.val; }
+SIT  T max(const Vec<1,T>& x) { return x.val; }
+SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); }
+SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); }
+
+SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); }
+SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); }
+
+SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); }
+SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
+SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
+SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
+
+// pin matches the logic of SkTPin, which is important when NaN is involved. It always returns
+// values in the range lo..hi, and if x is NaN, it returns lo.
+SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) {
+    return max(lo, min(x, hi));
+}
+
+// Shuffle values from a vector pretty arbitrarily:
+//    skvx::Vec<4,float> rgba = {R,G,B,A};
+//    shuffle<2,1,0,3>        (rgba) ~> {B,G,R,A}
+//    shuffle<2,1>            (rgba) ~> {B,G}
+//    shuffle<2,1,2,1,2,1,2,1>(rgba) ~> {B,G,B,G,B,G,B,G}
+//    shuffle<3,3,3,3>        (rgba) ~> {A,A,A,A}
+// The only real restriction is that the output also be a legal N=power-of-two sknx::Vec.
+template <int... Ix, int N, typename T>
+SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) {
+#if SKVX_USE_SIMD && defined(__clang__)
+    // TODO: can we just always use { x[Ix]... }?
+    return to_vec<sizeof...(Ix),T>(__builtin_shufflevector(to_vext(x), to_vext(x), Ix...));
+#else
+    return { x[Ix]... };
+#endif
+}
+
+// Call map(fn, x) for a vector with fn() applied to each lane of x, { fn(x[0]), fn(x[1]), ... },
+// or map(fn, x,y) for a vector of fn(x[i], y[i]), etc.
+
+template <typename Fn, typename... Args, size_t... I>
+SI auto map(std::index_sequence<I...>,
+            Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> {
+    auto lane = [&](size_t i)
+#if defined(__clang__)
+    // CFI, specifically -fsanitize=cfi-icall, seems to give a false positive here,
+    // with errors like "control flow integrity check for type 'float (float)
+    // noexcept' failed during indirect function call... note: sqrtf.cfi_jt defined
+    // here".  But we can be quite sure fn is the right type: it's all inferred!
+    // So, stifle CFI in this function.
+    __attribute__((no_sanitize("cfi")))
+#endif
+    { return fn(args[static_cast<int>(i)]...); };
+
+    return { lane(I)... };
+}
+
+template <typename Fn, int N, typename T, typename... Rest>
+auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) {
+    // Derive an {0...N-1} index_sequence from the size of the first arg: N lanes in, N lanes out.
+    return map(std::make_index_sequence<N>{}, fn, first,rest...);
+}
+
+SIN Vec<N,float>  ceil(const Vec<N,float>& x) { return map( ceilf, x); }
+SIN Vec<N,float> floor(const Vec<N,float>& x) { return map(floorf, x); }
+SIN Vec<N,float> trunc(const Vec<N,float>& x) { return map(truncf, x); }
+SIN Vec<N,float> round(const Vec<N,float>& x) { return map(roundf, x); }
+SIN Vec<N,float>  sqrt(const Vec<N,float>& x) { return map( sqrtf, x); }
+SIN Vec<N,float>   abs(const Vec<N,float>& x) { return map( fabsf, x); }
+SIN Vec<N,float>   fma(const Vec<N,float>& x,
+                       const Vec<N,float>& y,
+                       const Vec<N,float>& z) {
+    // I don't understand why Clang's codegen is terrible if we write map(fmaf, x,y,z) directly.
+    auto fn = [](float x, float y, float z) { return fmaf(x,y,z); };
+    return map(fn, x,y,z);
+}
+
+SI Vec<1,int> lrint(const Vec<1,float>& x) {
+    return (int)lrintf(x.val);
+}
+SIN Vec<N,int> lrint(const Vec<N,float>& x) {
+#if SKVX_USE_SIMD && defined(__AVX__)
+    if constexpr (N == 8) {
+        return bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(bit_pun<__m256>(x)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__SSE__)
+    if constexpr (N == 4) {
+        return bit_pun<Vec<N,int>>(_mm_cvtps_epi32(bit_pun<__m128>(x)));
+    }
+#endif
+    return join(lrint(x.lo),
+                lrint(x.hi));
+}
+
+SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); }
+
+// Assumes inputs are finite and treat/flush denorm half floats as/to zero.
+// Key constants to watch for:
+//    - a float is 32-bit, 1-8-23 sign-exponent-mantissa, with 127 exponent bias;
+//    - a half  is 16-bit, 1-5-10 sign-exponent-mantissa, with  15 exponent bias.
+SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) {
+    Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x),
+                    s   = sem & 0x8000'0000,
+                     em = sem ^ s,
+                is_norm =  em > 0x387f'd000, // halfway between largest f16 denorm and smallest norm
+                   norm = (em>>13) - ((127-15)<<10);
+    return cast<uint16_t>((s>>16) | (is_norm & norm));
+}
+SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) {
+    Vec<N,uint32_t> wide = cast<uint32_t>(x),
+                      s  = wide & 0x8000,
+                      em = wide ^ s,
+                 is_norm =   em > 0x3ff,
+                    norm = (em<<13) + ((127-15)<<23);
+    return bit_pun<Vec<N,float>>((s<<16) | (is_norm & norm));
+}
+
+// Like if_then_else(), these N=1 base cases won't actually be used unless explicitly called.
+SI Vec<1,uint16_t> to_half(const Vec<1,float>&    x) { return   to_half_finite_ftz(x); }
+SI Vec<1,float>  from_half(const Vec<1,uint16_t>& x) { return from_half_finite_ftz(x); }
+
+SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
+#if SKVX_USE_SIMD && defined(__F16C__)
+    if constexpr (N == 8) {
+        return bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(bit_pun<__m256>(x),
+                                                        _MM_FROUND_TO_NEAREST_INT));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    if constexpr (N == 4) {
+        return bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(bit_pun<float32x4_t>(x)));
+
+    }
+#endif
+    if constexpr (N > 4) {
+        return join(to_half(x.lo),
+                    to_half(x.hi));
+    }
+    return to_half_finite_ftz(x);
+}
+
+SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) {
+#if SKVX_USE_SIMD && defined(__F16C__)
+    if constexpr (N == 8) {
+        return bit_pun<Vec<N,float>>(_mm256_cvtph_ps(bit_pun<__m128i>(x)));
+    }
+#endif
+#if SKVX_USE_SIMD && defined(__aarch64__)
+    if constexpr (N == 4) {
+        return bit_pun<Vec<N,float>>(vcvt_f32_f16(bit_pun<float16x4_t>(x)));
+    }
+#endif
+    if constexpr (N > 4) {
+        return join(from_half(x.lo),
+                    from_half(x.hi));
+    }
+    return from_half_finite_ftz(x);
+}
+
+// div255(x) = (x + 127) / 255 is a bit-exact rounding divide-by-255, packing down to 8-bit.
+SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) {
+    return cast<uint8_t>( (x+127)/255 );
+}
+
+// approx_scale(x,y) approximates div255(cast<uint16_t>(x)*cast<uint16_t>(y)) within a bit,
+// and is always perfect when x or y is 0 or 255.
+SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) {
+    // All of (x*y+x)/256, (x*y+y)/256, and (x*y+255)/256 meet the criteria above.
+    // We happen to have historically picked (x*y+x)/256.
+    auto X = cast<uint16_t>(x),
+         Y = cast<uint16_t>(y);
+    return cast<uint8_t>( (X*Y+X)/256 );
+}
+
+// saturated_add(x,y) sums values and clamps to the maximum value instead of overflowing.
+SINT std::enable_if_t<std::is_unsigned_v<T>, Vec<N,T>> saturated_add(const Vec<N,T>& x,
+                                                                     const Vec<N,T>& y) {
+#if SKVX_USE_SIMD && (defined(__SSE__) || defined(__ARM_NEON))
+    // Both SSE and ARM have 16-lane saturated adds, so use intrinsics for those and recurse down
+    // or join up to take advantage.
+    if constexpr (N == 16 && sizeof(T) == 1) {
+        #if defined(__SSE__)
+        return bit_pun<Vec<N,T>>(_mm_adds_epu8(bit_pun<__m128i>(x), bit_pun<__m128i>(y)));
+        #else  // __ARM_NEON
+        return bit_pun<Vec<N,T>>(vqaddq_u8(bit_pun<uint8x16_t>(x), bit_pun<uint8x16_t>(y)));
+        #endif
+    } else if constexpr (N < 16 && sizeof(T) == 1) {
+        return saturated_add(join(x,x), join(y,y)).lo;
+    } else if constexpr (sizeof(T) == 1) {
+        return join(saturated_add(x.lo, y.lo), saturated_add(x.hi, y.hi));
+    }
+#endif
+    // Otherwise saturate manually
+    auto sum = x + y;
+    return if_then_else(sum < x, Vec<N,T>(std::numeric_limits<T>::max()), sum);
+}
+
+// The ScaledDividerU32 takes a divisor > 1, and creates a function divide(numerator) that
+// calculates a numerator / denominator. For this to be rounded properly, numerator should have
+// half added in:
+// divide(numerator + half) == floor(numerator/denominator + 1/2).
+//
+// This gives an answer within +/- 1 from the true value.
+//
+// Derivation of half:
+//    numerator/denominator + 1/2 = (numerator + half) / d
+//    numerator + denominator / 2 = numerator + half
+//    half = denominator / 2.
+//
+// Because half is divided by 2, that division must also be rounded.
+//    half == denominator / 2 = (denominator + 1) / 2.
+//
+// The divisorFactor is just a scaled value:
+//    divisorFactor = (1 / divisor) * 2 ^ 32.
+// The maximum that can be divided and rounded is UINT_MAX - half.
+class ScaledDividerU32 {
+public:
+    explicit ScaledDividerU32(uint32_t divisor)
+            : fDivisorFactor{(uint32_t)(std::round((1.0 / divisor) * (1ull << 32)))}
+            , fHalf{(divisor + 1) >> 1} {
+        assert(divisor > 1);
+    }
+
+    Vec<4, uint32_t> divide(const Vec<4, uint32_t>& numerator) const {
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+        uint64x2_t hi = vmull_n_u32(vget_high_u32(to_vext(numerator)), fDivisorFactor);
+        uint64x2_t lo = vmull_n_u32(vget_low_u32(to_vext(numerator)),  fDivisorFactor);
+
+        return to_vec<4, uint32_t>(vcombine_u32(vshrn_n_u64(lo,32), vshrn_n_u64(hi,32)));
+#else
+        return cast<uint32_t>((cast<uint64_t>(numerator) * fDivisorFactor) >> 32);
+#endif
+    }
+
+    uint32_t half() const { return fHalf; }
+
+private:
+    const uint32_t fDivisorFactor;
+    const uint32_t fHalf;
+};
+
+
+SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
+                         const Vec<N,uint8_t>& y) {
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+    // With NEON we can do eight u8*u8 -> u16 in one instruction, vmull_u8 (read, mul-long).
+    if constexpr (N == 8) {
+        return to_vec<8,uint16_t>(vmull_u8(to_vext(x), to_vext(y)));
+    } else if constexpr (N < 8) {
+        return mull(join(x,x), join(y,y)).lo;
+    } else { // N > 8
+        return join(mull(x.lo, y.lo), mull(x.hi, y.hi));
+    }
+#else
+    return cast<uint16_t>(x) * cast<uint16_t>(y);
+#endif
+}
+
+SIN Vec<N,uint32_t> mull(const Vec<N,uint16_t>& x,
+                         const Vec<N,uint16_t>& y) {
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+    // NEON can do four u16*u16 -> u32 in one instruction, vmull_u16
+    if constexpr (N == 4) {
+        return to_vec<4,uint32_t>(vmull_u16(to_vext(x), to_vext(y)));
+    } else if constexpr (N < 4) {
+        return mull(join(x,x), join(y,y)).lo;
+    } else { // N > 4
+        return join(mull(x.lo, y.lo), mull(x.hi, y.hi));
+    }
+#else
+    return cast<uint32_t>(x) * cast<uint32_t>(y);
+#endif
+}
+
+SIN Vec<N,uint16_t> mulhi(const Vec<N,uint16_t>& x,
+                          const Vec<N,uint16_t>& y) {
+#if SKVX_USE_SIMD && defined(__SSE__)
+    // Use _mm_mulhi_epu16 for 8xuint16_t and join or split to get there.
+    if constexpr (N == 8) {
+        return bit_pun<Vec<8,uint16_t>>(_mm_mulhi_epu16(bit_pun<__m128i>(x), bit_pun<__m128i>(y)));
+    } else if constexpr (N < 8) {
+        return mulhi(join(x,x), join(y,y)).lo;
+    } else { // N > 8
+        return join(mulhi(x.lo, y.lo), mulhi(x.hi, y.hi));
+    }
+#else
+    return skvx::cast<uint16_t>(mull(x, y) >> 16);
+#endif
+}
+
+SINT T dot(const Vec<N, T>& a, const Vec<N, T>& b) {
+    // While dot is a "horizontal" operation like any or all, it needs to remain
+    // in floating point and there aren't really any good SIMD instructions that make it faster.
+    // The constexpr cases remove the for loop in the only cases we realistically call.
+    auto ab = a*b;
+    if constexpr (N == 2) {
+        return ab[0] + ab[1];
+    } else if constexpr (N == 4) {
+        return ab[0] + ab[1] + ab[2] + ab[3];
+    } else {
+        T sum = ab[0];
+        for (int i = 1; i < N; ++i) {
+            sum += ab[i];
+        }
+        return sum;
+    }
+}
+
+SIT T cross(const Vec<2, T>& a, const Vec<2, T>& b) {
+    auto x = a * shuffle<1,0>(b);
+    return x[0] - x[1];
+}
+
+SIN float length(const Vec<N, float>& v) {
+    return std::sqrt(dot(v, v));
+}
+
+SIN double length(const Vec<N, double>& v) {
+    return std::sqrt(dot(v, v));
+}
+
+SIN Vec<N, float> normalize(const Vec<N, float>& v) {
+    return v / length(v);
+}
+
+SIN Vec<N, double> normalize(const Vec<N, double>& v) {
+    return v / length(v);
+}
+
+SINT bool isfinite(const Vec<N, T>& v) {
+    // Multiply all values together with 0. If they were all finite, the output is
+    // 0 (also finite). If any were not, we'll get nan.
+    return std::isfinite(dot(v, Vec<N, T>(0)));
+}
+
+// De-interleaving load of 4 vectors.
+//
+// WARNING: These are really only supported well on NEON. Consider restructuring your data before
+// resorting to these methods.
+SIT void strided_load4(const T* v,
+                       Vec<1,T>& a,
+                       Vec<1,T>& b,
+                       Vec<1,T>& c,
+                       Vec<1,T>& d) {
+    a.val = v[0];
+    b.val = v[1];
+    c.val = v[2];
+    d.val = v[3];
+}
+SINT void strided_load4(const T* v,
+                        Vec<N,T>& a,
+                        Vec<N,T>& b,
+                        Vec<N,T>& c,
+                        Vec<N,T>& d) {
+    strided_load4(v, a.lo, b.lo, c.lo, d.lo);
+    strided_load4(v + 4*(N/2), a.hi, b.hi, c.hi, d.hi);
+}
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+#define IMPL_LOAD4_TRANSPOSED(N, T, VLD) \
+SI void strided_load4(const T* v, \
+                      Vec<N,T>& a, \
+                      Vec<N,T>& b, \
+                      Vec<N,T>& c, \
+                      Vec<N,T>& d) { \
+    auto mat = VLD(v); \
+    a = bit_pun<Vec<N,T>>(mat.val[0]); \
+    b = bit_pun<Vec<N,T>>(mat.val[1]); \
+    c = bit_pun<Vec<N,T>>(mat.val[2]); \
+    d = bit_pun<Vec<N,T>>(mat.val[3]); \
+}
+IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32)
+IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16)
+IMPL_LOAD4_TRANSPOSED(8, uint8_t, vld4_u8)
+IMPL_LOAD4_TRANSPOSED(2, int32_t, vld4_s32)
+IMPL_LOAD4_TRANSPOSED(4, int16_t, vld4_s16)
+IMPL_LOAD4_TRANSPOSED(8, int8_t, vld4_s8)
+IMPL_LOAD4_TRANSPOSED(2, float, vld4_f32)
+IMPL_LOAD4_TRANSPOSED(4, uint32_t, vld4q_u32)
+IMPL_LOAD4_TRANSPOSED(8, uint16_t, vld4q_u16)
+IMPL_LOAD4_TRANSPOSED(16, uint8_t, vld4q_u8)
+IMPL_LOAD4_TRANSPOSED(4, int32_t, vld4q_s32)
+IMPL_LOAD4_TRANSPOSED(8, int16_t, vld4q_s16)
+IMPL_LOAD4_TRANSPOSED(16, int8_t, vld4q_s8)
+IMPL_LOAD4_TRANSPOSED(4, float, vld4q_f32)
+#undef IMPL_LOAD4_TRANSPOSED
+
+#elif SKVX_USE_SIMD && defined(__SSE__)
+
+SI void strided_load4(const float* v,
+                      Vec<4,float>& a,
+                      Vec<4,float>& b,
+                      Vec<4,float>& c,
+                      Vec<4,float>& d) {
+    __m128 a_ = _mm_loadu_ps(v);
+    __m128 b_ = _mm_loadu_ps(v+4);
+    __m128 c_ = _mm_loadu_ps(v+8);
+    __m128 d_ = _mm_loadu_ps(v+12);
+    _MM_TRANSPOSE4_PS(a_, b_, c_, d_);
+    a = bit_pun<Vec<4,float>>(a_);
+    b = bit_pun<Vec<4,float>>(b_);
+    c = bit_pun<Vec<4,float>>(c_);
+    d = bit_pun<Vec<4,float>>(d_);
+}
+#endif
+
+// De-interleaving load of 2 vectors.
+//
+// WARNING: These are really only supported well on NEON. Consider restructuring your data before
+// resorting to these methods.
+SIT void strided_load2(const T* v, Vec<1,T>& a, Vec<1,T>& b) {
+    a.val = v[0];
+    b.val = v[1];
+}
+SINT void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) {
+    strided_load2(v, a.lo, b.lo);
+    strided_load2(v + 2*(N/2), a.hi, b.hi);
+}
+#if SKVX_USE_SIMD && defined(__ARM_NEON)
+#define IMPL_LOAD2_TRANSPOSED(N, T, VLD) \
+SI void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) { \
+    auto mat = VLD(v); \
+    a = bit_pun<Vec<N,T>>(mat.val[0]); \
+    b = bit_pun<Vec<N,T>>(mat.val[1]); \
+}
+IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32)
+IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16)
+IMPL_LOAD2_TRANSPOSED(8, uint8_t, vld2_u8)
+IMPL_LOAD2_TRANSPOSED(2, int32_t, vld2_s32)
+IMPL_LOAD2_TRANSPOSED(4, int16_t, vld2_s16)
+IMPL_LOAD2_TRANSPOSED(8, int8_t, vld2_s8)
+IMPL_LOAD2_TRANSPOSED(2, float, vld2_f32)
+IMPL_LOAD2_TRANSPOSED(4, uint32_t, vld2q_u32)
+IMPL_LOAD2_TRANSPOSED(8, uint16_t, vld2q_u16)
+IMPL_LOAD2_TRANSPOSED(16, uint8_t, vld2q_u8)
+IMPL_LOAD2_TRANSPOSED(4, int32_t, vld2q_s32)
+IMPL_LOAD2_TRANSPOSED(8, int16_t, vld2q_s16)
+IMPL_LOAD2_TRANSPOSED(16, int8_t, vld2q_s8)
+IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32)
+#undef IMPL_LOAD2_TRANSPOSED
+#endif
+
+// Define commonly used aliases
+using float2  = Vec< 2, float>;
+using float4  = Vec< 4, float>;
+using float8  = Vec< 8, float>;
+
+using double2 = Vec< 2, double>;
+using double4 = Vec< 4, double>;
+using double8 = Vec< 8, double>;
+
+using byte2   = Vec< 2, uint8_t>;
+using byte4   = Vec< 4, uint8_t>;
+using byte8   = Vec< 8, uint8_t>;
+using byte16  = Vec<16, uint8_t>;
+
+using int2    = Vec< 2, int32_t>;
+using int4    = Vec< 4, int32_t>;
+using int8    = Vec< 8, int32_t>;
+
+using uint2   = Vec< 2, uint32_t>;
+using uint4   = Vec< 4, uint32_t>;
+using uint8   = Vec< 8, uint32_t>;
+
+using long2   = Vec< 2, int64_t>;
+using long4   = Vec< 4, int64_t>;
+using long8   = Vec< 8, int64_t>;
+
+// Use with from_half and to_half to convert between floatX, and use these for storage.
+using half2   = Vec< 2, uint16_t>;
+using half4   = Vec< 4, uint16_t>;
+using half8   = Vec< 8, uint16_t>;
+
+}  // namespace skvx
+
+#undef SINTU
+#undef SINT
+#undef SIN
+#undef SIT
+#undef SI
+#undef SKVX_ALWAYS_INLINE
+#undef SKVX_USE_SIMD
+
+#endif//SKVX_DEFINED
diff --git a/gfx/skia/skia/src/base/SkZip.h b/gfx/skia/skia/src/base/SkZip.h
new file mode 100644
index 0000000000..884aa11d8d
--- /dev/null
+++ b/gfx/skia/skia/src/base/SkZip.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkZip_DEFINED
+#define SkZip_DEFINED
+
+#include "include/private/base/SkAssert.h"
+#include "include/private/base/SkDebug.h"
+#include "include/private/base/SkSpan_impl.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
+// Take a list of things that can be pointers, and use them all in parallel. The iterators and
+// accessor operator[] for the class produce a tuple of the items.
+template<typename... Ts>
+class SkZip {
+    using ReturnTuple = std::tuple<Ts&...>;
+
+    class Iterator {
+    public:
+        using value_type = ReturnTuple;
+        using difference_type = ptrdiff_t;
+        using pointer = value_type*;
+        using reference = value_type;
+        using iterator_category = std::input_iterator_tag;
+        constexpr Iterator(const SkZip* zip, size_t index) : fZip{zip}, fIndex{index} { }
+        constexpr Iterator(const Iterator& that) : Iterator{ that.fZip, that.fIndex } { }
+        constexpr Iterator& operator++() { ++fIndex; return *this; }
+        constexpr Iterator operator++(int) { Iterator tmp(*this); operator++(); return tmp; }
+        constexpr bool operator==(const Iterator& rhs) const { return fIndex == rhs.fIndex; }
+        constexpr bool operator!=(const Iterator& rhs) const { return fIndex != rhs.fIndex; }
+        constexpr reference operator*() { return (*fZip)[fIndex]; }
+        friend constexpr difference_type operator-(Iterator lhs, Iterator rhs) {
+            return lhs.fIndex - rhs.fIndex;
+        }
+
+    private:
+        const SkZip* const fZip = nullptr;
+        size_t fIndex = 0;
+    };
+
+    template<typename T>
+    inline static constexpr T* nullify = nullptr;
+
+public:
+    constexpr SkZip() : fPointers{nullify<Ts>...}, fSize{0} {}
+    constexpr SkZip(size_t) = delete;
+    constexpr SkZip(size_t size, Ts*... ts)
+            : fPointers{ts...}
+            , fSize{size} {}
+    constexpr SkZip(const SkZip& that) = default;
+    constexpr SkZip& operator=(const SkZip &that) = default;
+
+    // Check to see if U can be used for const T or is the same as T
+    template <typename U, typename T>
+    using CanConvertToConst = typename std::integral_constant<bool,
+                    std::is_convertible<U*, T*>::value && sizeof(U) == sizeof(T)>::type;
+
+    // Allow SkZip<const T> to be constructed from SkZip<T>.
+    template<typename... Us,
+            typename = std::enable_if<std::conjunction<CanConvertToConst<Us, Ts>...>::value>>
+    constexpr SkZip(const SkZip<Us...>& that)
+        : fPointers(that.data())
+        , fSize{that.size()} { }
+
+    constexpr ReturnTuple operator[](size_t i) const { return this->index(i);}
+    constexpr size_t size() const { return fSize; }
+    constexpr bool empty() const { return this->size() == 0; }
+    constexpr ReturnTuple front() const { return this->index(0); }
+    constexpr ReturnTuple back() const { return this->index(this->size() - 1); }
+    constexpr Iterator begin() const { return Iterator{this, 0}; }
+    constexpr Iterator end() const { return Iterator{this, this->size()}; }
+    template<size_t I> constexpr auto get() const {
+        return SkSpan(std::get<I>(fPointers), fSize);
+    }
+    constexpr std::tuple<Ts*...> data() const { return fPointers; }
+    constexpr SkZip first(size_t n) const {
+        SkASSERT(n <= this->size());
+        if (n == 0) { return SkZip(); }
+        return SkZip{n, fPointers};
+    }
+    constexpr SkZip last(size_t n) const {
+        SkASSERT(n <= this->size());
+        if (n == 0) { return SkZip(); }
+        return SkZip{n, this->pointersAt(fSize - n)};
+    }
+    constexpr SkZip subspan(size_t offset, size_t count) const {
+        SkASSERT(offset < this->size());
+        SkASSERT(count <= this->size() - offset);
+        if (count == 0) { return SkZip(); }
+        return SkZip(count, pointersAt(offset));
+    }
+
+private:
+    constexpr SkZip(size_t n, const std::tuple<Ts*...>& pointers)
+        : fPointers{pointers}
+        , fSize{n} {}
+
+    constexpr ReturnTuple index(size_t i) const {
+        SkASSERT(this->size() > 0);
+        SkASSERT(i < this->size());
+        return indexDetail(i, std::make_index_sequence<sizeof...(Ts)>{});
+    }
+
+    template<std::size_t... Is>
+    constexpr ReturnTuple indexDetail(size_t i, std::index_sequence<Is...>) const {
+        return ReturnTuple((std::get<Is>(fPointers))[i]...);
+    }
+
+    std::tuple<Ts*...> pointersAt(size_t i) const {
+        SkASSERT(this->size() > 0);
+        SkASSERT(i < this->size());
+        return pointersAtDetail(i, std::make_index_sequence<sizeof...(Ts)>{});
+    }
+
+    template<std::size_t... Is>
+    constexpr std::tuple<Ts*...> pointersAtDetail(size_t i, std::index_sequence<Is...>) const {
+        return std::tuple<Ts*...>{&(std::get<Is>(fPointers))[i]...};
+    }
+
+    std::tuple<Ts*...> fPointers;
+    size_t fSize;
+};
+
+class SkMakeZipDetail {
+    template<typename T> struct DecayPointer{
+        using U = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
+        using type = typename std::conditional<std::is_pointer<U>::value, U, T>::type;
+    };
+    template<typename T> using DecayPointerT = typename DecayPointer<T>::type;
+
+    template<typename C> struct ContiguousMemory { };
+    template<typename T> struct ContiguousMemory<T*> {
+        using value_type = T;
+        static constexpr value_type* Data(T* t) { return t; }
+        static constexpr size_t Size(T* s) { return SIZE_MAX; }
+    };
+    template<typename T, size_t N> struct ContiguousMemory<T(&)[N]> {
+        using value_type = T;
+        static constexpr value_type* Data(T(&t)[N]) { return t; }
+        static constexpr size_t Size(T(&)[N]) { return N; }
+    };
+    // In general, we don't want r-value collections, but SkSpans are ok, because they are a view
+    // onto an actual container.
+    template<typename T> struct ContiguousMemory<SkSpan<T>> {
+        using value_type = T;
+        static constexpr value_type* Data(SkSpan<T> s) { return s.data(); }
+        static constexpr size_t Size(SkSpan<T> s) { return s.size(); }
+    };
+    // Only accept l-value references to collections.
+    template<typename C> struct ContiguousMemory<C&> {
+        using value_type = typename std::remove_pointer<decltype(std::declval<C>().data())>::type;
+        static constexpr value_type* Data(C& c) { return c.data(); }
+        static constexpr size_t Size(C& c) { return c.size(); }
+    };
+    template<typename C> using Span = ContiguousMemory<DecayPointerT<C>>;
+    template<typename C> using ValueType = typename Span<C>::value_type;
+
+    template<typename C, typename... Ts> struct PickOneSize { };
+    template <typename T, typename... Ts> struct PickOneSize<T*, Ts...> {
+        static constexpr size_t Size(T* t, Ts... ts) {
+            return PickOneSize<Ts...>::Size(std::forward<Ts>(ts)...);
+        }
+    };
+    template <typename T, typename... Ts, size_t N> struct PickOneSize<T(&)[N], Ts...> {
+        static constexpr size_t Size(T(&)[N], Ts...) { return N; }
+    };
+    template<typename T, typename... Ts> struct PickOneSize<SkSpan<T>, Ts...> {
+        static constexpr size_t Size(SkSpan<T> s, Ts...) { return s.size(); }
+    };
+    template<typename C, typename... Ts> struct PickOneSize<C&, Ts...> {
+        static constexpr size_t Size(C& c, Ts...) { return c.size(); }
+    };
+
+public:
+    template<typename... Ts>
+    static constexpr auto MakeZip(Ts&& ... ts) {
+
+        // Pick the first collection that has a size, and use that for the size.
+        size_t size = PickOneSize<DecayPointerT<Ts>...>::Size(std::forward<Ts>(ts)...);
+
+#ifdef SK_DEBUG
+        // Check that all sizes are the same.
+        size_t minSize = SIZE_MAX;
+        size_t maxSize = 0;
+        for (size_t s : {Span<Ts>::Size(std::forward<Ts>(ts))...}) {
+            if (s != SIZE_MAX) {
+                minSize = std::min(minSize, s);
+                maxSize = std::max(maxSize, s);
+            }
+        }
+        SkASSERT(minSize == maxSize);
+#endif
+
+        return SkZip<ValueType<Ts>...>{size, Span<Ts>::Data(std::forward<Ts>(ts))...};
+    }
+};
+
+template<typename... Ts>
+SkZip(size_t size, Ts*... ts) -> SkZip<Ts...>;
+
+template<typename... Ts>
+inline constexpr auto SkMakeZip(Ts&& ... ts) {
+    return SkMakeZipDetail::MakeZip(std::forward<Ts>(ts)...);
+}
+#endif //SkZip_DEFINED