diff options
Diffstat (limited to '')
49 files changed, 8326 insertions, 0 deletions
diff --git a/gfx/skia/skia/src/base/README.md b/gfx/skia/skia/src/base/README.md new file mode 100644 index 0000000000..322c671436 --- /dev/null +++ b/gfx/skia/skia/src/base/README.md @@ -0,0 +1,4 @@ +The files here are part of the base package (see also include/private/base). The distinction +is that the files here are not needed by anything in the public API. + +Files here should not depend on anything other than system headers or other files in base.
\ No newline at end of file diff --git a/gfx/skia/skia/src/base/SkASAN.h b/gfx/skia/skia/src/base/SkASAN.h new file mode 100644 index 0000000000..8da93daaa0 --- /dev/null +++ b/gfx/skia/skia/src/base/SkASAN.h @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkASAN_DEFINED +#define SkASAN_DEFINED + +#include <cstddef> + +#ifdef MOZ_SKIA + +#include "mozilla/MemoryChecking.h" + +#ifdef MOZ_HAVE_MEM_CHECKS +#define SK_SANITIZE_ADDRESS MOZ_HAVE_MEM_CHECKS +#endif + +static inline void sk_asan_poison_memory_region(void const volatile *addr, size_t size) { + MOZ_MAKE_MEM_NOACCESS(addr, size); +} + +static inline void sk_asan_unpoison_memory_region(void const volatile *addr, size_t size) { + MOZ_MAKE_MEM_DEFINED(addr, size); +} + +#else // !MOZ_SKIA + +#ifdef __SANITIZE_ADDRESS__ + #define SK_SANITIZE_ADDRESS 1 +#endif +#if !defined(SK_SANITIZE_ADDRESS) && defined(__has_feature) + #if __has_feature(address_sanitizer) + #define SK_SANITIZE_ADDRESS 1 + #endif +#endif + +// Typically declared in LLVM's asan_interface.h. +#ifdef SK_SANITIZE_ADDRESS +extern "C" { + void __asan_poison_memory_region(void const volatile *addr, size_t size); + void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +} +#endif + +// Code that implements bespoke allocation arenas can poison the entire arena on creation, then +// unpoison chunks of arena memory as they are parceled out. Consider leaving gaps between blocks +// to detect buffer overrun. +static inline void sk_asan_poison_memory_region(void const volatile *addr, size_t size) { +#ifdef SK_SANITIZE_ADDRESS + __asan_poison_memory_region(addr, size); +#endif +} + +static inline void sk_asan_unpoison_memory_region(void const volatile *addr, size_t size) { +#ifdef SK_SANITIZE_ADDRESS + __asan_unpoison_memory_region(addr, size); +#endif +} + +#endif // !MOZ_SKIA + +#endif // SkASAN_DEFINED diff --git a/gfx/skia/skia/src/base/SkArenaAlloc.cpp b/gfx/skia/skia/src/base/SkArenaAlloc.cpp new file mode 100644 index 0000000000..2dc1c00226 --- /dev/null +++ b/gfx/skia/skia/src/base/SkArenaAlloc.cpp @@ -0,0 +1,173 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkArenaAlloc.h" + +#include "include/private/base/SkMalloc.h" + +#include <algorithm> +#include <cassert> +#include <cstddef> + +static char* end_chain(char*) { return nullptr; } + +SkArenaAlloc::SkArenaAlloc(char* block, size_t size, size_t firstHeapAllocation) + : fDtorCursor {block} + , fCursor {block} + , fEnd {block + SkToU32(size)} + , fFibonacciProgression{SkToU32(size), SkToU32(firstHeapAllocation)} +{ + if (size < sizeof(Footer)) { + fEnd = fCursor = fDtorCursor = nullptr; + } + + if (fCursor != nullptr) { + this->installFooter(end_chain, 0); + sk_asan_poison_memory_region(fCursor, fEnd - fCursor); + } +} + +SkArenaAlloc::~SkArenaAlloc() { + RunDtorsOnBlock(fDtorCursor); +} + +void SkArenaAlloc::installFooter(FooterAction* action, uint32_t padding) { + assert(SkTFitsIn<uint8_t>(padding)); + this->installRaw(action); + this->installRaw((uint8_t)padding); + fDtorCursor = fCursor; +} + +char* SkArenaAlloc::SkipPod(char* footerEnd) { + char* objEnd = footerEnd - (sizeof(Footer) + sizeof(uint32_t)); + uint32_t skip; + memmove(&skip, objEnd, sizeof(uint32_t)); + return objEnd - (ptrdiff_t) skip; +} + +void SkArenaAlloc::RunDtorsOnBlock(char* footerEnd) { + while (footerEnd != nullptr) { + FooterAction* action; + uint8_t padding; + + memcpy(&action, footerEnd - sizeof( Footer), sizeof( action)); + memcpy(&padding, footerEnd - sizeof(padding), sizeof(padding)); + + footerEnd = action(footerEnd) - (ptrdiff_t)padding; + } +} + +char* SkArenaAlloc::NextBlock(char* footerEnd) { + char* objEnd = footerEnd - (sizeof(char*) + sizeof(Footer)); + char* next; + memmove(&next, objEnd, sizeof(char*)); + RunDtorsOnBlock(next); + sk_free(objEnd); + return nullptr; +} + +void SkArenaAlloc::ensureSpace(uint32_t size, uint32_t alignment) { + constexpr uint32_t headerSize = sizeof(Footer) + sizeof(ptrdiff_t); + constexpr uint32_t maxSize = std::numeric_limits<uint32_t>::max(); + constexpr uint32_t overhead = headerSize + sizeof(Footer); + AssertRelease(size <= maxSize - overhead); + uint32_t objSizeAndOverhead = size + overhead; + + const uint32_t alignmentOverhead = alignment - 1; + AssertRelease(objSizeAndOverhead <= maxSize - alignmentOverhead); + objSizeAndOverhead += alignmentOverhead; + + uint32_t minAllocationSize = fFibonacciProgression.nextBlockSize(); + uint32_t allocationSize = std::max(objSizeAndOverhead, minAllocationSize); + + // Round up to a nice size. If > 32K align to 4K boundary else up to max_align_t. The > 32K + // heuristic is from the JEMalloc behavior. + { + uint32_t mask = allocationSize > (1 << 15) ? (1 << 12) - 1 : 16 - 1; + AssertRelease(allocationSize <= maxSize - mask); + allocationSize = (allocationSize + mask) & ~mask; + } + + char* newBlock = static_cast<char*>(sk_malloc_throw(allocationSize)); + + auto previousDtor = fDtorCursor; + fCursor = newBlock; + fDtorCursor = newBlock; + fEnd = fCursor + allocationSize; + + // poison the unused bytes in the block. + sk_asan_poison_memory_region(fCursor, fEnd - fCursor); + + this->installRaw(previousDtor); + this->installFooter(NextBlock, 0); +} + +char* SkArenaAlloc::allocObjectWithFooter(uint32_t sizeIncludingFooter, uint32_t alignment) { + uintptr_t mask = alignment - 1; + +restart: + uint32_t skipOverhead = 0; + const bool needsSkipFooter = fCursor != fDtorCursor; + if (needsSkipFooter) { + skipOverhead = sizeof(Footer) + sizeof(uint32_t); + } + const uint32_t totalSize = sizeIncludingFooter + skipOverhead; + + // Math on null fCursor/fEnd is undefined behavior, so explicitly check for first alloc. + if (!fCursor) { + this->ensureSpace(totalSize, alignment); + goto restart; + } + + assert(fEnd); + // This test alone would be enough nullptr were defined to be 0, but it's not. + char* objStart = (char*)((uintptr_t)(fCursor + skipOverhead + mask) & ~mask); + if ((ptrdiff_t)totalSize > fEnd - objStart) { + this->ensureSpace(totalSize, alignment); + goto restart; + } + + AssertRelease((ptrdiff_t)totalSize <= fEnd - objStart); + + // Install a skip footer if needed, thus terminating a run of POD data. The calling code is + // responsible for installing the footer after the object. + if (needsSkipFooter) { + this->installRaw(SkToU32(fCursor - fDtorCursor)); + this->installFooter(SkipPod, 0); + } + + return objStart; +} + +SkArenaAllocWithReset::SkArenaAllocWithReset(char* block, + size_t size, + size_t firstHeapAllocation) + : SkArenaAlloc(block, size, firstHeapAllocation) + , fFirstBlock{block} + , fFirstSize{SkToU32(size)} + , fFirstHeapAllocationSize{SkToU32(firstHeapAllocation)} {} + +void SkArenaAllocWithReset::reset() { + char* const firstBlock = fFirstBlock; + const uint32_t firstSize = fFirstSize; + const uint32_t firstHeapAllocationSize = fFirstHeapAllocationSize; + this->~SkArenaAllocWithReset(); + new (this) SkArenaAllocWithReset{firstBlock, firstSize, firstHeapAllocationSize}; +} + +// SkFibonacci47 is the first 47 Fibonacci numbers. Fib(47) is the largest value less than 2 ^ 32. +// Used by SkFibBlockSizes. +std::array<const uint32_t, 47> SkFibonacci47 { + 1, 1, 2, 3, 5, 8, + 13, 21, 34, 55, 89, 144, + 233, 377, 610, 987, 1597, 2584, + 4181, 6765, 10946, 17711, 28657, 46368, + 75025, 121393, 196418, 317811, 514229, 832040, + 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, + 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, + 433494437, 701408733, 1134903170, 1836311903, 2971215073, +}; diff --git a/gfx/skia/skia/src/base/SkArenaAlloc.h b/gfx/skia/skia/src/base/SkArenaAlloc.h new file mode 100644 index 0000000000..547f2c5910 --- /dev/null +++ b/gfx/skia/skia/src/base/SkArenaAlloc.h @@ -0,0 +1,336 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkArenaAlloc_DEFINED +#define SkArenaAlloc_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkTFitsIn.h" +#include "include/private/base/SkTo.h" +#include "src/base/SkASAN.h" + +#include <algorithm> +#include <array> +#include <cstdint> +#include <cstdlib> +#include <cstring> +#include <limits> +#include <new> +#include <type_traits> +#include <utility> + +// We found allocating strictly doubling amounts of memory from the heap left too +// much unused slop, particularly on Android. Instead we'll follow a Fibonacci-like +// progression. + +// SkFibonacci47 is the first 47 Fibonacci numbers. Fib(47) is the largest value less than 2 ^ 32. +extern std::array<const uint32_t, 47> SkFibonacci47; +template<uint32_t kMaxSize> +class SkFibBlockSizes { +public: + // staticBlockSize, and firstAllocationSize are parameters describing the initial memory + // layout. staticBlockSize describes the size of the inlined memory, and firstAllocationSize + // describes the size of the first block to be allocated if the static block is exhausted. By + // convention, firstAllocationSize is the first choice for the block unit size followed by + // staticBlockSize followed by the default of 1024 bytes. + SkFibBlockSizes(uint32_t staticBlockSize, uint32_t firstAllocationSize) : fIndex{0} { + fBlockUnitSize = firstAllocationSize > 0 ? firstAllocationSize : + staticBlockSize > 0 ? staticBlockSize : 1024; + + SkASSERT_RELEASE(0 < fBlockUnitSize); + SkASSERT_RELEASE(fBlockUnitSize < std::min(kMaxSize, (1u << 26) - 1)); + } + + uint32_t nextBlockSize() { + uint32_t result = SkFibonacci47[fIndex] * fBlockUnitSize; + + if (SkTo<size_t>(fIndex + 1) < SkFibonacci47.size() && + SkFibonacci47[fIndex + 1] < kMaxSize / fBlockUnitSize) + { + fIndex += 1; + } + + return result; + } + +private: + uint32_t fIndex : 6; + uint32_t fBlockUnitSize : 26; +}; + +// SkArenaAlloc allocates object and destroys the allocated objects when destroyed. It's designed +// to minimize the number of underlying block allocations. SkArenaAlloc allocates first out of an +// (optional) user-provided block of memory, and when that's exhausted it allocates on the heap, +// starting with an allocation of firstHeapAllocation bytes. If your data (plus a small overhead) +// fits in the user-provided block, SkArenaAlloc never uses the heap, and if it fits in +// firstHeapAllocation bytes, it'll use the heap only once. If 0 is specified for +// firstHeapAllocation, then blockSize is used unless that too is 0, then 1024 is used. +// +// Examples: +// +// char block[mostCasesSize]; +// SkArenaAlloc arena(block, mostCasesSize); +// +// If mostCasesSize is too large for the stack, you can use the following pattern. +// +// std::unique_ptr<char[]> block{new char[mostCasesSize]}; +// SkArenaAlloc arena(block.get(), mostCasesSize, almostAllCasesSize); +// +// If the program only sometimes allocates memory, use the following pattern. +// +// SkArenaAlloc arena(nullptr, 0, almostAllCasesSize); +// +// The storage does not necessarily need to be on the stack. Embedding the storage in a class also +// works. +// +// class Foo { +// char storage[mostCasesSize]; +// SkArenaAlloc arena (storage, mostCasesSize); +// }; +// +// In addition, the system is optimized to handle POD data including arrays of PODs (where +// POD is really data with no destructors). For POD data it has zero overhead per item, and a +// typical per block overhead of 8 bytes. For non-POD objects there is a per item overhead of 4 +// bytes. For arrays of non-POD objects there is a per array overhead of typically 8 bytes. There +// is an addition overhead when switching from POD data to non-POD data of typically 8 bytes. +// +// If additional blocks are needed they are increased exponentially. This strategy bounds the +// recursion of the RunDtorsOnBlock to be limited to O(log size-of-memory). Block size grow using +// the Fibonacci sequence which means that for 2^32 memory there are 48 allocations, and for 2^48 +// there are 71 allocations. +class SkArenaAlloc { +public: + SkArenaAlloc(char* block, size_t blockSize, size_t firstHeapAllocation); + + explicit SkArenaAlloc(size_t firstHeapAllocation) + : SkArenaAlloc(nullptr, 0, firstHeapAllocation) {} + + SkArenaAlloc(const SkArenaAlloc&) = delete; + SkArenaAlloc& operator=(const SkArenaAlloc&) = delete; + SkArenaAlloc(SkArenaAlloc&&) = delete; + SkArenaAlloc& operator=(SkArenaAlloc&&) = delete; + + ~SkArenaAlloc(); + + template <typename Ctor> + auto make(Ctor&& ctor) -> decltype(ctor(nullptr)) { + using T = std::remove_pointer_t<decltype(ctor(nullptr))>; + + uint32_t size = SkToU32(sizeof(T)); + uint32_t alignment = SkToU32(alignof(T)); + char* objStart; + if (std::is_trivially_destructible<T>::value) { + objStart = this->allocObject(size, alignment); + fCursor = objStart + size; + sk_asan_unpoison_memory_region(objStart, size); + } else { + objStart = this->allocObjectWithFooter(size + sizeof(Footer), alignment); + // Can never be UB because max value is alignof(T). + uint32_t padding = SkToU32(objStart - fCursor); + + // Advance to end of object to install footer. + fCursor = objStart + size; + sk_asan_unpoison_memory_region(objStart, size); + FooterAction* releaser = [](char* objEnd) { + char* objStart = objEnd - (sizeof(T) + sizeof(Footer)); + ((T*)objStart)->~T(); + return objStart; + }; + this->installFooter(releaser, padding); + } + + // This must be last to make objects with nested use of this allocator work. + return ctor(objStart); + } + + template <typename T, typename... Args> + T* make(Args&&... args) { + return this->make([&](void* objStart) { + return new(objStart) T(std::forward<Args>(args)...); + }); + } + + template <typename T> + T* makeArrayDefault(size_t count) { + T* array = this->allocUninitializedArray<T>(count); + for (size_t i = 0; i < count; i++) { + // Default initialization: if T is primitive then the value is left uninitialized. + new (&array[i]) T; + } + return array; + } + + template <typename T> + T* makeArray(size_t count) { + T* array = this->allocUninitializedArray<T>(count); + for (size_t i = 0; i < count; i++) { + // Value initialization: if T is primitive then the value is zero-initialized. + new (&array[i]) T(); + } + return array; + } + + template <typename T, typename Initializer> + T* makeInitializedArray(size_t count, Initializer initializer) { + T* array = this->allocUninitializedArray<T>(count); + for (size_t i = 0; i < count; i++) { + new (&array[i]) T(initializer(i)); + } + return array; + } + + // Only use makeBytesAlignedTo if none of the typed variants are impractical to use. + void* makeBytesAlignedTo(size_t size, size_t align) { + AssertRelease(SkTFitsIn<uint32_t>(size)); + auto objStart = this->allocObject(SkToU32(size), SkToU32(align)); + fCursor = objStart + size; + sk_asan_unpoison_memory_region(objStart, size); + return objStart; + } + +private: + static void AssertRelease(bool cond) { if (!cond) { ::abort(); } } + + using FooterAction = char* (char*); + struct Footer { + uint8_t unaligned_action[sizeof(FooterAction*)]; + uint8_t padding; + }; + + static char* SkipPod(char* footerEnd); + static void RunDtorsOnBlock(char* footerEnd); + static char* NextBlock(char* footerEnd); + + template <typename T> + void installRaw(const T& val) { + sk_asan_unpoison_memory_region(fCursor, sizeof(val)); + memcpy(fCursor, &val, sizeof(val)); + fCursor += sizeof(val); + } + void installFooter(FooterAction* releaser, uint32_t padding); + + void ensureSpace(uint32_t size, uint32_t alignment); + + char* allocObject(uint32_t size, uint32_t alignment) { + uintptr_t mask = alignment - 1; + uintptr_t alignedOffset = (~reinterpret_cast<uintptr_t>(fCursor) + 1) & mask; + uintptr_t totalSize = size + alignedOffset; + AssertRelease(totalSize >= size); + if (totalSize > static_cast<uintptr_t>(fEnd - fCursor)) { + this->ensureSpace(size, alignment); + alignedOffset = (~reinterpret_cast<uintptr_t>(fCursor) + 1) & mask; + } + + char* object = fCursor + alignedOffset; + + SkASSERT((reinterpret_cast<uintptr_t>(object) & (alignment - 1)) == 0); + SkASSERT(object + size <= fEnd); + + return object; + } + + char* allocObjectWithFooter(uint32_t sizeIncludingFooter, uint32_t alignment); + + template <typename T> + T* allocUninitializedArray(size_t countZ) { + AssertRelease(SkTFitsIn<uint32_t>(countZ)); + uint32_t count = SkToU32(countZ); + + char* objStart; + AssertRelease(count <= std::numeric_limits<uint32_t>::max() / sizeof(T)); + uint32_t arraySize = SkToU32(count * sizeof(T)); + uint32_t alignment = SkToU32(alignof(T)); + + if (std::is_trivially_destructible<T>::value) { + objStart = this->allocObject(arraySize, alignment); + fCursor = objStart + arraySize; + sk_asan_unpoison_memory_region(objStart, arraySize); + } else { + constexpr uint32_t overhead = sizeof(Footer) + sizeof(uint32_t); + AssertRelease(arraySize <= std::numeric_limits<uint32_t>::max() - overhead); + uint32_t totalSize = arraySize + overhead; + objStart = this->allocObjectWithFooter(totalSize, alignment); + + // Can never be UB because max value is alignof(T). + uint32_t padding = SkToU32(objStart - fCursor); + + // Advance to end of array to install footer. + fCursor = objStart + arraySize; + sk_asan_unpoison_memory_region(objStart, arraySize); + this->installRaw(SkToU32(count)); + this->installFooter( + [](char* footerEnd) { + char* objEnd = footerEnd - (sizeof(Footer) + sizeof(uint32_t)); + uint32_t count; + memmove(&count, objEnd, sizeof(uint32_t)); + char* objStart = objEnd - count * sizeof(T); + T* array = (T*) objStart; + for (uint32_t i = 0; i < count; i++) { + array[i].~T(); + } + return objStart; + }, + padding); + } + + return (T*)objStart; + } + + char* fDtorCursor; + char* fCursor; + char* fEnd; + + SkFibBlockSizes<std::numeric_limits<uint32_t>::max()> fFibonacciProgression; +}; + +class SkArenaAllocWithReset : public SkArenaAlloc { +public: + SkArenaAllocWithReset(char* block, size_t blockSize, size_t firstHeapAllocation); + + explicit SkArenaAllocWithReset(size_t firstHeapAllocation) + : SkArenaAllocWithReset(nullptr, 0, firstHeapAllocation) {} + + // Destroy all allocated objects, free any heap allocations. + void reset(); + +private: + char* const fFirstBlock; + const uint32_t fFirstSize; + const uint32_t fFirstHeapAllocationSize; +}; + +// Helper for defining allocators with inline/reserved storage. +// For argument declarations, stick to the base type (SkArenaAlloc). +// Note: Inheriting from the storage first means the storage will outlive the +// SkArenaAlloc, letting ~SkArenaAlloc read it as it calls destructors. +// (This is mostly only relevant for strict tools like MSAN.) +template <size_t InlineStorageSize> +class SkSTArenaAlloc : private std::array<char, InlineStorageSize>, public SkArenaAlloc { +public: + explicit SkSTArenaAlloc(size_t firstHeapAllocation = InlineStorageSize) + : SkArenaAlloc{this->data(), this->size(), firstHeapAllocation} {} + + ~SkSTArenaAlloc() { + // Be sure to unpoison the memory that is probably on the stack. + sk_asan_unpoison_memory_region(this->data(), this->size()); + } +}; + +template <size_t InlineStorageSize> +class SkSTArenaAllocWithReset + : private std::array<char, InlineStorageSize>, public SkArenaAllocWithReset { +public: + explicit SkSTArenaAllocWithReset(size_t firstHeapAllocation = InlineStorageSize) + : SkArenaAllocWithReset{this->data(), this->size(), firstHeapAllocation} {} + + ~SkSTArenaAllocWithReset() { + // Be sure to unpoison the memory that is probably on the stack. + sk_asan_unpoison_memory_region(this->data(), this->size()); + } +}; + +#endif // SkArenaAlloc_DEFINED diff --git a/gfx/skia/skia/src/base/SkArenaAllocList.h b/gfx/skia/skia/src/base/SkArenaAllocList.h new file mode 100644 index 0000000000..57bce52023 --- /dev/null +++ b/gfx/skia/skia/src/base/SkArenaAllocList.h @@ -0,0 +1,82 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkArenaAllocList_DEFINED +#define SkArenaAllocList_DEFINED + +#include "include/private/base/SkAssert.h" +#include "src/base/SkArenaAlloc.h" // IWYU pragma: keep + +#include <utility> + +/** + * A singly linked list of Ts stored in a SkArenaAlloc. The arena rather than the list owns + * the elements. This supports forward iteration and range based for loops. + */ +template <typename T> +class SkArenaAllocList { +private: + struct Node; + +public: + SkArenaAllocList() = default; + + void reset() { fHead = fTail = nullptr; } + + template <typename... Args> + inline T& append(SkArenaAlloc* arena, Args... args); + + class Iter { + public: + Iter() = default; + inline Iter& operator++(); + T& operator*() const { return fCurr->fT; } + T* operator->() const { return &fCurr->fT; } + bool operator==(const Iter& that) const { return fCurr == that.fCurr; } + bool operator!=(const Iter& that) const { return !(*this == that); } + + private: + friend class SkArenaAllocList; + explicit Iter(Node* node) : fCurr(node) {} + Node* fCurr = nullptr; + }; + + Iter begin() { return Iter(fHead); } + Iter end() { return Iter(); } + Iter tail() { return Iter(fTail); } + +private: + struct Node { + template <typename... Args> + Node(Args... args) : fT(std::forward<Args>(args)...) {} + T fT; + Node* fNext = nullptr; + }; + Node* fHead = nullptr; + Node* fTail = nullptr; +}; + +template <typename T> +template <typename... Args> +T& SkArenaAllocList<T>::append(SkArenaAlloc* arena, Args... args) { + SkASSERT(!fHead == !fTail); + auto* n = arena->make<Node>(std::forward<Args>(args)...); + if (!fTail) { + fHead = fTail = n; + } else { + fTail = fTail->fNext = n; + } + return fTail->fT; +} + +template <typename T> +typename SkArenaAllocList<T>::Iter& SkArenaAllocList<T>::Iter::operator++() { + fCurr = fCurr->fNext; + return *this; +} + +#endif diff --git a/gfx/skia/skia/src/base/SkAutoMalloc.h b/gfx/skia/skia/src/base/SkAutoMalloc.h new file mode 100644 index 0000000000..6520cc0582 --- /dev/null +++ b/gfx/skia/skia/src/base/SkAutoMalloc.h @@ -0,0 +1,178 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkAutoMalloc_DEFINED +#define SkAutoMalloc_DEFINED + +#include "include/private/base/SkAlign.h" +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkMalloc.h" +#include "include/private/base/SkNoncopyable.h" + +#include <cstddef> +#include <cstdint> +#include <memory> + +/** + * Manage an allocated block of heap memory. This object is the sole manager of + * the lifetime of the block, so the caller must not call sk_free() or delete + * on the block, unless release() was called. + */ +class SkAutoMalloc : SkNoncopyable { +public: + explicit SkAutoMalloc(size_t size = 0) + : fPtr(size ? sk_malloc_throw(size) : nullptr), fSize(size) {} + + /** + * Passed to reset to specify what happens if the requested size is smaller + * than the current size (and the current block was dynamically allocated). + */ + enum OnShrink { + /** + * If the requested size is smaller than the current size, and the + * current block is dynamically allocated, free the old block and + * malloc a new block of the smaller size. + */ + kAlloc_OnShrink, + + /** + * If the requested size is smaller than the current size, and the + * current block is dynamically allocated, just return the old + * block. + */ + kReuse_OnShrink + }; + + /** + * Reallocates the block to a new size. The ptr may or may not change. + */ + void* reset(size_t size = 0, OnShrink shrink = kAlloc_OnShrink) { + if (size != fSize && (size > fSize || kReuse_OnShrink != shrink)) { + fPtr.reset(size ? sk_malloc_throw(size) : nullptr); + fSize = size; + } + return fPtr.get(); + } + + /** + * Return the allocated block. + */ + void* get() { return fPtr.get(); } + const void* get() const { return fPtr.get(); } + + /** Transfer ownership of the current ptr to the caller, setting the + internal reference to null. Note the caller is reponsible for calling + sk_free on the returned address. + */ + void* release() { + fSize = 0; + return fPtr.release(); + } + +private: + struct WrapFree { + void operator()(void* p) { sk_free(p); } + }; + std::unique_ptr<void, WrapFree> fPtr; + size_t fSize; // can be larger than the requested size (see kReuse) +}; + +/** + * Manage an allocated block of memory. If the requested size is <= kSizeRequested (or slightly + * more), then the allocation will come from the stack rather than the heap. This object is the + * sole manager of the lifetime of the block, so the caller must not call sk_free() or delete on + * the block. + */ +template <size_t kSizeRequested> class SkAutoSMalloc : SkNoncopyable { +public: + /** + * Creates initially empty storage. get() returns a ptr, but it is to a zero-byte allocation. + * Must call reset(size) to return an allocated block. + */ + SkAutoSMalloc() { + fPtr = fStorage; + fSize = kSize; + } + + /** + * Allocate a block of the specified size. If size <= kSizeRequested (or slightly more), then + * the allocation will come from the stack, otherwise it will be dynamically allocated. + */ + explicit SkAutoSMalloc(size_t size) { + fPtr = fStorage; + fSize = kSize; + this->reset(size); + } + + /** + * Free the allocated block (if any). If the block was small enough to have been allocated on + * the stack, then this does nothing. + */ + ~SkAutoSMalloc() { + if (fPtr != (void*)fStorage) { + sk_free(fPtr); + } + } + + /** + * Return the allocated block. May return non-null even if the block is of zero size. Since + * this may be on the stack or dynamically allocated, the caller must not call sk_free() on it, + * but must rely on SkAutoSMalloc to manage it. + */ + void* get() const { return fPtr; } + + /** + * Return a new block of the requested size, freeing (as necessary) any previously allocated + * block. As with the constructor, if size <= kSizeRequested (or slightly more) then the return + * block may be allocated locally, rather than from the heap. + */ + void* reset(size_t size, + SkAutoMalloc::OnShrink shrink = SkAutoMalloc::kAlloc_OnShrink, + bool* didChangeAlloc = nullptr) { + size = (size < kSize) ? kSize : size; + bool alloc = size != fSize && (SkAutoMalloc::kAlloc_OnShrink == shrink || size > fSize); + if (didChangeAlloc) { + *didChangeAlloc = alloc; + } + if (alloc) { + if (fPtr != (void*)fStorage) { + sk_free(fPtr); + } + + if (size == kSize) { + SkASSERT(fPtr != fStorage); // otherwise we lied when setting didChangeAlloc. + fPtr = fStorage; + } else { + fPtr = sk_malloc_throw(size); + } + + fSize = size; + } + SkASSERT(fSize >= size && fSize >= kSize); + SkASSERT((fPtr == fStorage) || fSize > kSize); + return fPtr; + } + +private: + // Align up to 32 bits. + static const size_t kSizeAlign4 = SkAlign4(kSizeRequested); +#if defined(SK_BUILD_FOR_GOOGLE3) + // Stack frame size is limited for SK_BUILD_FOR_GOOGLE3. 4k is less than the actual max, but some functions + // have multiple large stack allocations. + static const size_t kMaxBytes = 4 * 1024; + static const size_t kSize = kSizeRequested > kMaxBytes ? kMaxBytes : kSizeAlign4; +#else + static const size_t kSize = kSizeAlign4; +#endif + + void* fPtr; + size_t fSize; // can be larger than the requested size (see kReuse) + uint32_t fStorage[kSize >> 2]; +}; +// Can't guard the constructor because it's a template class. + +#endif diff --git a/gfx/skia/skia/src/base/SkBezierCurves.cpp b/gfx/skia/skia/src/base/SkBezierCurves.cpp new file mode 100644 index 0000000000..a79129ff7d --- /dev/null +++ b/gfx/skia/skia/src/base/SkBezierCurves.cpp @@ -0,0 +1,111 @@ +/* + * Copyright 2012 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkBezierCurves.h" + +#include "include/private/base/SkAssert.h" + +#include <cstddef> + +static inline double interpolate(double A, double B, double t) { + return A + (B - A) * t; +} + +std::array<double, 2> SkBezierCubic::EvalAt(const double curve[8], double t) { + const auto in_X = [&curve](size_t n) { return curve[2*n]; }; + const auto in_Y = [&curve](size_t n) { return curve[2*n + 1]; }; + + // Two semi-common fast paths + if (t == 0) { + return {in_X(0), in_Y(0)}; + } + if (t == 1) { + return {in_X(3), in_Y(3)}; + } + // X(t) = X_0*(1-t)^3 + 3*X_1*t(1-t)^2 + 3*X_2*t^2(1-t) + X_3*t^3 + // Y(t) = Y_0*(1-t)^3 + 3*Y_1*t(1-t)^2 + 3*Y_2*t^2(1-t) + Y_3*t^3 + // Some compilers are smart enough and have sufficient registers/intrinsics to write optimal + // code from + // double one_minus_t = 1 - t; + // double a = one_minus_t * one_minus_t * one_minus_t; + // double b = 3 * one_minus_t * one_minus_t * t; + // double c = 3 * one_minus_t * t * t; + // double d = t * t * t; + // However, some (e.g. when compiling for ARM) fail to do so, so we use this form + // to help more compilers generate smaller/faster ASM. https://godbolt.org/z/M6jG9x45c + double one_minus_t = 1 - t; + double one_minus_t_squared = one_minus_t * one_minus_t; + double a = (one_minus_t_squared * one_minus_t); + double b = 3 * one_minus_t_squared * t; + double t_squared = t * t; + double c = 3 * one_minus_t * t_squared; + double d = t_squared * t; + + return {a * in_X(0) + b * in_X(1) + c * in_X(2) + d * in_X(3), + a * in_Y(0) + b * in_Y(1) + c * in_Y(2) + d * in_Y(3)}; +} + +// Perform subdivision using De Casteljau's algorithm, that is, repeated linear +// interpolation between adjacent points. +void SkBezierCubic::Subdivide(const double curve[8], double t, + double twoCurves[14]) { + SkASSERT(0.0 <= t && t <= 1.0); + // We split the curve "in" into two curves "alpha" and "beta" + const auto in_X = [&curve](size_t n) { return curve[2*n]; }; + const auto in_Y = [&curve](size_t n) { return curve[2*n + 1]; }; + const auto alpha_X = [&twoCurves](size_t n) -> double& { return twoCurves[2*n]; }; + const auto alpha_Y = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 1]; }; + const auto beta_X = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 6]; }; + const auto beta_Y = [&twoCurves](size_t n) -> double& { return twoCurves[2*n + 7]; }; + + alpha_X(0) = in_X(0); + alpha_Y(0) = in_Y(0); + + beta_X(3) = in_X(3); + beta_Y(3) = in_Y(3); + + double x01 = interpolate(in_X(0), in_X(1), t); + double y01 = interpolate(in_Y(0), in_Y(1), t); + double x12 = interpolate(in_X(1), in_X(2), t); + double y12 = interpolate(in_Y(1), in_Y(2), t); + double x23 = interpolate(in_X(2), in_X(3), t); + double y23 = interpolate(in_Y(2), in_Y(3), t); + + alpha_X(1) = x01; + alpha_Y(1) = y01; + + beta_X(2) = x23; + beta_Y(2) = y23; + + alpha_X(2) = interpolate(x01, x12, t); + alpha_Y(2) = interpolate(y01, y12, t); + + beta_X(1) = interpolate(x12, x23, t); + beta_Y(1) = interpolate(y12, y23, t); + + alpha_X(3) /*= beta_X(0) */ = interpolate(alpha_X(2), beta_X(1), t); + alpha_Y(3) /*= beta_Y(0) */ = interpolate(alpha_Y(2), beta_Y(1), t); +} + +std::array<double, 4> SkBezierCubic::ConvertToPolynomial(const double curve[8], bool yValues) { + const double* offset_curve = yValues ? curve + 1 : curve; + const auto P = [&offset_curve](size_t n) { return offset_curve[2*n]; }; + // A cubic Bézier curve is interpolated as follows: + // c(t) = (1 - t)^3 P_0 + 3t(1 - t)^2 P_1 + 3t^2 (1 - t) P_2 + t^3 P_3 + // = (-P_0 + 3P_1 + -3P_2 + P_3) t^3 + (3P_0 - 6P_1 + 3P_2) t^2 + + // (-3P_0 + 3P_1) t + P_0 + // Where P_N is the Nth point. The second step expands the polynomial and groups + // by powers of t. The desired output is a cubic formula, so we just need to + // combine the appropriate points to make the coefficients. + std::array<double, 4> results; + results[0] = -P(0) + 3*P(1) - 3*P(2) + P(3); + results[1] = 3*P(0) - 6*P(1) + 3*P(2); + results[2] = -3*P(0) + 3*P(1); + results[3] = P(0); + return results; +} + diff --git a/gfx/skia/skia/src/base/SkBezierCurves.h b/gfx/skia/skia/src/base/SkBezierCurves.h new file mode 100644 index 0000000000..772fee4bf7 --- /dev/null +++ b/gfx/skia/skia/src/base/SkBezierCurves.h @@ -0,0 +1,63 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#ifndef SkBezierCurves_DEFINED +#define SkBezierCurves_DEFINED + +#include <array> + +/** + * Utilities for dealing with cubic Bézier curves. These have a start XY + * point, an end XY point, and two control XY points in between. They take + * a parameter t which is between 0 and 1 (inclusive) which is used to + * interpolate between the start and end points, via a route dictated by + * the control points, and return a new XY point. + * + * We store a Bézier curve as an array of 8 floats or doubles, where + * the even indices are the X coordinates, and the odd indices are the Y + * coordinates. + */ +class SkBezierCubic { +public: + + /** + * Evaluates the cubic Bézier curve for a given t. It returns an X and Y coordinate + * following the formula, which does the interpolation mentioned above. + * X(t) = X_0*(1-t)^3 + 3*X_1*t(1-t)^2 + 3*X_2*t^2(1-t) + X_3*t^3 + * Y(t) = Y_0*(1-t)^3 + 3*Y_1*t(1-t)^2 + 3*Y_2*t^2(1-t) + Y_3*t^3 + * + * t is typically in the range [0, 1], but this function will not assert that, + * as Bézier curves are well-defined for any real number input. + */ + static std::array<double, 2> EvalAt(const double curve[8], double t); + + /** + * Splits the provided Bézier curve at the location t, resulting in two + * Bézier curves that share a point (the end point from curve 1 + * and the start point from curve 2 are the same). + * + * t must be in the interval [0, 1]. + * + * The provided twoCurves array will be filled such that indices + * 0-7 are the first curve (representing the interval [0, t]), and + * indices 6-13 are the second curve (representing [t, 1]). + */ + static void Subdivide(const double curve[8], double t, + double twoCurves[14]); + + /** + * Converts the provided Bézier curve into the the equivalent cubic + * f(t) = A*t^3 + B*t^2 + C*t + D + * where f(t) will represent Y coordinates over time if yValues is + * true and the X coordinates if yValues is false. + * + * In effect, this turns the control points into an actual line, representing + * the x or y values. + */ + static std::array<double, 4> ConvertToPolynomial(const double curve[8], bool yValues); +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkBlockAllocator.cpp b/gfx/skia/skia/src/base/SkBlockAllocator.cpp new file mode 100644 index 0000000000..e62fc2078d --- /dev/null +++ b/gfx/skia/skia/src/base/SkBlockAllocator.cpp @@ -0,0 +1,302 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkBlockAllocator.h" + +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkTo.h" + +#ifdef SK_DEBUG +#include <vector> +#endif + +SkBlockAllocator::SkBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes, + size_t additionalPreallocBytes) + : fTail(&fHead) + // Round up to the nearest max-aligned value, and then divide so that fBlockSizeIncrement + // can effectively fit higher byte counts in its 16 bits of storage + , fBlockIncrement(SkTo<uint16_t>( + std::min(SkAlignTo(blockIncrementBytes, kAddressAlign) / kAddressAlign, + (size_t) std::numeric_limits<uint16_t>::max()))) + , fGrowthPolicy(static_cast<uint64_t>(policy)) + , fN0((policy == GrowthPolicy::kLinear || policy == GrowthPolicy::kExponential) ? 1 : 0) + , fN1(1) + // The head block always fills remaining space from SkBlockAllocator's size, because it's + // inline, but can take over the specified number of bytes immediately after it. + , fHead(/*prev=*/nullptr, additionalPreallocBytes + BaseHeadBlockSize()) { + SkASSERT(fBlockIncrement >= 1); + SkASSERT(additionalPreallocBytes <= kMaxAllocationSize); +} + +SkBlockAllocator::Block::Block(Block* prev, int allocationSize) + : fNext(nullptr) + , fPrev(prev) + , fSize(allocationSize) + , fCursor(kDataStart) + , fMetadata(0) + , fAllocatorMetadata(0) { + SkASSERT(allocationSize >= (int) sizeof(Block)); + SkDEBUGCODE(fSentinel = kAssignedMarker;) + + this->poisonRange(kDataStart, fSize); +} + +SkBlockAllocator::Block::~Block() { + this->unpoisonRange(kDataStart, fSize); + + SkASSERT(fSentinel == kAssignedMarker); + SkDEBUGCODE(fSentinel = kFreedMarker;) // FWIW +} + +size_t SkBlockAllocator::totalSize() const { + // Use size_t since the sum across all blocks could exceed 'int', even though each block won't + size_t size = offsetof(SkBlockAllocator, fHead) + this->scratchBlockSize(); + for (const Block* b : this->blocks()) { + size += b->fSize; + } + SkASSERT(size >= this->preallocSize()); + return size; +} + +size_t SkBlockAllocator::totalUsableSpace() const { + size_t size = this->scratchBlockSize(); + if (size > 0) { + size -= kDataStart; // scratchBlockSize reports total block size, not usable size + } + for (const Block* b : this->blocks()) { + size += (b->fSize - kDataStart); + } + SkASSERT(size >= this->preallocUsableSpace()); + return size; +} + +size_t SkBlockAllocator::totalSpaceInUse() const { + size_t size = 0; + for (const Block* b : this->blocks()) { + size += (b->fCursor - kDataStart); + } + SkASSERT(size <= this->totalUsableSpace()); + return size; +} + +SkBlockAllocator::Block* SkBlockAllocator::findOwningBlock(const void* p) { + // When in doubt, search in reverse to find an overlapping block. + uintptr_t ptr = reinterpret_cast<uintptr_t>(p); + for (Block* b : this->rblocks()) { + uintptr_t lowerBound = reinterpret_cast<uintptr_t>(b) + kDataStart; + uintptr_t upperBound = reinterpret_cast<uintptr_t>(b) + b->fSize; + if (lowerBound <= ptr && ptr < upperBound) { + SkASSERT(b->fSentinel == kAssignedMarker); + return b; + } + } + return nullptr; +} + +void SkBlockAllocator::releaseBlock(Block* block) { + if (block == &fHead) { + // Reset the cursor of the head block so that it can be reused if it becomes the new tail + block->fCursor = kDataStart; + block->fMetadata = 0; + block->poisonRange(kDataStart, block->fSize); + // Unlike in reset(), we don't set the head's next block to null because there are + // potentially heap-allocated blocks that are still connected to it. + } else { + SkASSERT(block->fPrev); + block->fPrev->fNext = block->fNext; + if (block->fNext) { + SkASSERT(fTail != block); + block->fNext->fPrev = block->fPrev; + } else { + SkASSERT(fTail == block); + fTail = block->fPrev; + } + + // The released block becomes the new scratch block (if it's bigger), or delete it + if (this->scratchBlockSize() < block->fSize) { + SkASSERT(block != fHead.fPrev); // shouldn't already be the scratch block + if (fHead.fPrev) { + delete fHead.fPrev; + } + block->markAsScratch(); + fHead.fPrev = block; + } else { + delete block; + } + } + + // Decrement growth policy (opposite of addBlock()'s increment operations) + GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy); + if (fN0 > 0 && (fN1 > 1 || gp == GrowthPolicy::kFibonacci)) { + SkASSERT(gp != GrowthPolicy::kFixed); // fixed never needs undoing, fN0 always is 0 + if (gp == GrowthPolicy::kLinear) { + fN1 = fN1 - fN0; + } else if (gp == GrowthPolicy::kFibonacci) { + // Subtract n0 from n1 to get the prior 2 terms in the fibonacci sequence + int temp = fN1 - fN0; // yields prior fN0 + fN1 = fN1 - temp; // yields prior fN1 + fN0 = temp; + } else { + SkASSERT(gp == GrowthPolicy::kExponential); + // Divide by 2 to undo the 2N update from addBlock + fN1 = fN1 >> 1; + fN0 = fN1; + } + } + + SkASSERT(fN1 >= 1 && fN0 >= 0); +} + +void SkBlockAllocator::stealHeapBlocks(SkBlockAllocator* other) { + Block* toSteal = other->fHead.fNext; + if (toSteal) { + // The other's next block connects back to this allocator's current tail, and its new tail + // becomes the end of other's block linked list. + SkASSERT(other->fTail != &other->fHead); + toSteal->fPrev = fTail; + fTail->fNext = toSteal; + fTail = other->fTail; + // The other allocator becomes just its inline head block + other->fTail = &other->fHead; + other->fHead.fNext = nullptr; + } // else no block to steal +} + +void SkBlockAllocator::reset() { + for (Block* b : this->rblocks()) { + if (b == &fHead) { + // Reset metadata and cursor, tail points to the head block again + fTail = b; + b->fNext = nullptr; + b->fCursor = kDataStart; + b->fMetadata = 0; + // For reset(), but NOT releaseBlock(), the head allocatorMetadata and scratch block + // are reset/destroyed. + b->fAllocatorMetadata = 0; + b->poisonRange(kDataStart, b->fSize); + this->resetScratchSpace(); + } else { + delete b; + } + } + SkASSERT(fTail == &fHead && fHead.fNext == nullptr && fHead.fPrev == nullptr && + fHead.metadata() == 0 && fHead.fCursor == kDataStart); + + GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy); + fN0 = (gp == GrowthPolicy::kLinear || gp == GrowthPolicy::kExponential) ? 1 : 0; + fN1 = 1; +} + +void SkBlockAllocator::resetScratchSpace() { + if (fHead.fPrev) { + delete fHead.fPrev; + fHead.fPrev = nullptr; + } +} + +void SkBlockAllocator::addBlock(int minSize, int maxSize) { + SkASSERT(minSize > (int) sizeof(Block) && minSize <= maxSize); + + // Max positive value for uint:23 storage (decltype(fN0) picks up uint64_t, not uint:23). + static constexpr int kMaxN = (1 << 23) - 1; + static_assert(2 * kMaxN <= std::numeric_limits<int32_t>::max()); // Growth policy won't overflow + + auto alignAllocSize = [](int size) { + // Round to a nice boundary since the block isn't maxing out: + // if allocSize > 32K, aligns on 4K boundary otherwise aligns on max_align_t, to play + // nicely with jeMalloc (from SkArenaAlloc). + int mask = size > (1 << 15) ? ((1 << 12) - 1) : (kAddressAlign - 1); + return (size + mask) & ~mask; + }; + + int allocSize; + void* mem = nullptr; + if (this->scratchBlockSize() >= minSize) { + // Activate the scratch block instead of making a new block + SkASSERT(fHead.fPrev->isScratch()); + allocSize = fHead.fPrev->fSize; + mem = fHead.fPrev; + fHead.fPrev = nullptr; + } else if (minSize < maxSize) { + // Calculate the 'next' size per growth policy sequence + GrowthPolicy gp = static_cast<GrowthPolicy>(fGrowthPolicy); + int nextN1 = fN0 + fN1; + int nextN0; + if (gp == GrowthPolicy::kFixed || gp == GrowthPolicy::kLinear) { + nextN0 = fN0; + } else if (gp == GrowthPolicy::kFibonacci) { + nextN0 = fN1; + } else { + SkASSERT(gp == GrowthPolicy::kExponential); + nextN0 = nextN1; + } + fN0 = std::min(kMaxN, nextN0); + fN1 = std::min(kMaxN, nextN1); + + // However, must guard against overflow here, since all the size-based asserts prevented + // alignment/addition overflows, while multiplication requires 2x bits instead of x+1. + int sizeIncrement = fBlockIncrement * kAddressAlign; + if (maxSize / sizeIncrement < nextN1) { + // The growth policy would overflow, so use the max. We've already confirmed that + // maxSize will be sufficient for the requested minimumSize + allocSize = maxSize; + } else { + allocSize = std::min(alignAllocSize(std::max(minSize, sizeIncrement * nextN1)), + maxSize); + } + } else { + SkASSERT(minSize == maxSize); + // Still align on a nice boundary, no max clamping since that would just undo the alignment + allocSize = alignAllocSize(minSize); + } + + // Create new block and append to the linked list of blocks in this allocator + if (!mem) { + mem = operator new(allocSize); + } + fTail->fNext = new (mem) Block(fTail, allocSize); + fTail = fTail->fNext; +} + +#ifdef SK_DEBUG +void SkBlockAllocator::validate() const { + std::vector<const Block*> blocks; + const Block* prev = nullptr; + for (const Block* block : this->blocks()) { + blocks.push_back(block); + + SkASSERT(kAssignedMarker == block->fSentinel); + if (block == &fHead) { + // The head blocks' fPrev may be non-null if it holds a scratch block, but that's not + // considered part of the linked list + SkASSERT(!prev && (!fHead.fPrev || fHead.fPrev->isScratch())); + } else { + SkASSERT(prev == block->fPrev); + } + if (prev) { + SkASSERT(prev->fNext == block); + } + + SkASSERT(block->fSize >= (int) sizeof(Block)); + SkASSERT(block->fCursor >= kDataStart); + SkASSERT(block->fCursor <= block->fSize); + + prev = block; + } + SkASSERT(prev == fTail); + SkASSERT(!blocks.empty()); + SkASSERT(blocks[0] == &fHead); + + // Confirm reverse iteration matches forward iteration + size_t j = blocks.size(); + for (const Block* b : this->rblocks()) { + SkASSERT(b == blocks[j - 1]); + j--; + } + SkASSERT(j == 0); +} +#endif diff --git a/gfx/skia/skia/src/base/SkBlockAllocator.h b/gfx/skia/skia/src/base/SkBlockAllocator.h new file mode 100644 index 0000000000..02201c17d4 --- /dev/null +++ b/gfx/skia/skia/src/base/SkBlockAllocator.h @@ -0,0 +1,754 @@ +/* + * Copyright 2020 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBlockAllocator_DEFINED +#define SkBlockAllocator_DEFINED + +#include "include/private/base/SkAlign.h" +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkMacros.h" +#include "include/private/base/SkMath.h" +#include "include/private/base/SkNoncopyable.h" +#include "src/base/SkASAN.h" + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <new> +#include <type_traits> + +/** + * SkBlockAllocator provides low-level support for a block allocated arena with a dynamic tail that + * tracks space reservations within each block. Its APIs provide the ability to reserve space, + * resize reservations, and release reservations. It will automatically create new blocks if needed + * and destroy all remaining blocks when it is destructed. It assumes that anything allocated within + * its blocks has its destructors called externally. It is recommended that SkBlockAllocator is + * wrapped by a higher-level allocator that uses the low-level APIs to implement a simpler, + * purpose-focused API w/o having to worry as much about byte-level concerns. + * + * SkBlockAllocator has no limit to its total size, but each allocation is limited to 512MB (which + * should be sufficient for Skia's use cases). This upper allocation limit allows all internal + * operations to be performed using 'int' and avoid many overflow checks. Static asserts are used + * to ensure that those operations would not overflow when using the largest possible values. + * + * Possible use modes: + * 1. No upfront allocation, either on the stack or as a field + * SkBlockAllocator allocator(policy, heapAllocSize); + * + * 2. In-place new'd + * void* mem = operator new(totalSize); + * SkBlockAllocator* allocator = new (mem) SkBlockAllocator(policy, heapAllocSize, + * totalSize- sizeof(SkBlockAllocator)); + * delete allocator; + * + * 3. Use SkSBlockAllocator to increase the preallocation size + * SkSBlockAllocator<1024> allocator(policy, heapAllocSize); + * sizeof(allocator) == 1024; + */ +// TODO(michaelludwig) - While API is different, this shares similarities to SkArenaAlloc and +// SkFibBlockSizes, so we should work to integrate them. +class SkBlockAllocator final : SkNoncopyable { +public: + // Largest size that can be requested from allocate(), chosen because it's the largest pow-2 + // that is less than int32_t::max()/2. + inline static constexpr int kMaxAllocationSize = 1 << 29; + + enum class GrowthPolicy : int { + kFixed, // Next block size = N + kLinear, // = #blocks * N + kFibonacci, // = fibonacci(#blocks) * N + kExponential, // = 2^#blocks * N + kLast = kExponential + }; + inline static constexpr int kGrowthPolicyCount = static_cast<int>(GrowthPolicy::kLast) + 1; + + class Block final { + public: + ~Block(); + void operator delete(void* p) { ::operator delete(p); } + + // Return the maximum allocation size with the given alignment that can fit in this block. + template <size_t Align = 1, size_t Padding = 0> + int avail() const { return std::max(0, fSize - this->cursor<Align, Padding>()); } + + // Return the aligned offset of the first allocation, assuming it was made with the + // specified Align, and Padding. The returned offset does not mean a valid allocation + // starts at that offset, this is a utility function for classes built on top to manage + // indexing into a block effectively. + template <size_t Align = 1, size_t Padding = 0> + int firstAlignedOffset() const { return this->alignedOffset<Align, Padding>(kDataStart); } + + // Convert an offset into this block's storage into a usable pointer. + void* ptr(int offset) { + SkASSERT(offset >= kDataStart && offset < fSize); + return reinterpret_cast<char*>(this) + offset; + } + const void* ptr(int offset) const { return const_cast<Block*>(this)->ptr(offset); } + + // Every block has an extra 'int' for clients to use however they want. It will start + // at 0 when a new block is made, or when the head block is reset. + int metadata() const { return fMetadata; } + void setMetadata(int value) { fMetadata = value; } + + /** + * Release the byte range between offset 'start' (inclusive) and 'end' (exclusive). This + * will return true if those bytes were successfully reclaimed, i.e. a subsequent allocation + * request could occupy the space. Regardless of return value, the provided byte range that + * [start, end) represents should not be used until it's re-allocated with allocate<...>(). + */ + inline bool release(int start, int end); + + /** + * Resize a previously reserved byte range of offset 'start' (inclusive) to 'end' + * (exclusive). 'deltaBytes' is the SIGNED change to length of the reservation. + * + * When negative this means the reservation is shrunk and the new length is (end - start - + * |deltaBytes|). If this new length would be 0, the byte range can no longer be used (as if + * it were released instead). Asserts that it would not shrink the reservation below 0. + * + * If 'deltaBytes' is positive, the allocator attempts to increase the length of the + * reservation. If 'deltaBytes' is less than or equal to avail() and it was the last + * allocation in the block, it can be resized. If there is not enough available bytes to + * accommodate the increase in size, or another allocation is blocking the increase in size, + * then false will be returned and the reserved byte range is unmodified. + */ + inline bool resize(int start, int end, int deltaBytes); + + private: + friend class SkBlockAllocator; + + Block(Block* prev, int allocationSize); + + // We poison the unallocated space in a Block to allow ASAN to catch invalid writes. + void poisonRange(int start, int end) { + sk_asan_poison_memory_region(reinterpret_cast<char*>(this) + start, end - start); + } + void unpoisonRange(int start, int end) { + sk_asan_unpoison_memory_region(reinterpret_cast<char*>(this) + start, end - start); + } + + // Get fCursor, but aligned such that ptr(rval) satisfies Align. + template <size_t Align, size_t Padding> + int cursor() const { return this->alignedOffset<Align, Padding>(fCursor); } + + template <size_t Align, size_t Padding> + int alignedOffset(int offset) const; + + bool isScratch() const { return fCursor < 0; } + void markAsScratch() { + fCursor = -1; + this->poisonRange(kDataStart, fSize); + } + + SkDEBUGCODE(uint32_t fSentinel;) // known value to check for bad back pointers to blocks + + Block* fNext; // doubly-linked list of blocks + Block* fPrev; + + // Each block tracks its own cursor because as later blocks are released, an older block + // may become the active tail again. + int fSize; // includes the size of the BlockHeader and requested metadata + int fCursor; // (this + fCursor) points to next available allocation + int fMetadata; + + // On release builds, a Block's other 2 pointers and 3 int fields leaves 4 bytes of padding + // for 8 and 16 aligned systems. Currently this is only manipulated in the head block for + // an allocator-level metadata and is explicitly not reset when the head block is "released" + // Down the road we could instead choose to offer multiple metadata slots per block. + int fAllocatorMetadata; + }; + + // Tuple representing a range of bytes, marking the unaligned start, the first aligned point + // after any padding, and the upper limit depending on requested size. + struct ByteRange { + Block* fBlock; // Owning block + int fStart; // Inclusive byte lower limit of byte range + int fAlignedOffset; // >= start, matching alignment requirement (i.e. first real byte) + int fEnd; // Exclusive upper limit of byte range + }; + + // The size of the head block is determined by 'additionalPreallocBytes'. Subsequent heap blocks + // are determined by 'policy' and 'blockIncrementBytes', although 'blockIncrementBytes' will be + // aligned to std::max_align_t. + // + // When 'additionalPreallocBytes' > 0, the allocator assumes that many extra bytes immediately + // after the allocator can be used by its inline head block. This is useful when the allocator + // is in-place new'ed into a larger block of memory, but it should remain set to 0 if stack + // allocated or if the class layout does not guarantee that space is present. + SkBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes, + size_t additionalPreallocBytes = 0); + + ~SkBlockAllocator() { this->reset(); } + void operator delete(void* p) { ::operator delete(p); } + + /** + * Helper to calculate the minimum number of bytes needed for heap block size, under the + * assumption that Align will be the requested alignment of the first call to allocate(). + * Ex. To store N instances of T in a heap block, the 'blockIncrementBytes' should be set to + * BlockOverhead<alignof(T)>() + N * sizeof(T) when making the SkBlockAllocator. + */ + template<size_t Align = 1, size_t Padding = 0> + static constexpr size_t BlockOverhead(); + + /** + * Helper to calculate the minimum number of bytes needed for a preallocation, under the + * assumption that Align will be the requested alignment of the first call to allocate(). + * Ex. To preallocate a SkSBlockAllocator to hold N instances of T, its arge should be + * Overhead<alignof(T)>() + N * sizeof(T) + */ + template<size_t Align = 1, size_t Padding = 0> + static constexpr size_t Overhead(); + + /** + * Return the total number of bytes of the allocator, including its instance overhead, per-block + * overhead and space used for allocations. + */ + size_t totalSize() const; + /** + * Return the total number of bytes usable for allocations. This includes bytes that have + * been reserved already by a call to allocate() and bytes that are still available. It is + * totalSize() minus all allocator and block-level overhead. + */ + size_t totalUsableSpace() const; + /** + * Return the total number of usable bytes that have been reserved by allocations. This will + * be less than or equal to totalUsableSpace(). + */ + size_t totalSpaceInUse() const; + + /** + * Return the total number of bytes that were pre-allocated for the SkBlockAllocator. This will + * include 'additionalPreallocBytes' passed to the constructor, and represents what the total + * size would become after a call to reset(). + */ + size_t preallocSize() const { + // Don't double count fHead's Block overhead in both sizeof(SkBlockAllocator) and fSize. + return sizeof(SkBlockAllocator) + fHead.fSize - BaseHeadBlockSize(); + } + /** + * Return the usable size of the inline head block; this will be equal to + * 'additionalPreallocBytes' plus any alignment padding that the system had to add to Block. + * The returned value represents what could be allocated before a heap block is be created. + */ + size_t preallocUsableSpace() const { + return fHead.fSize - kDataStart; + } + + /** + * Get the current value of the allocator-level metadata (a user-oriented slot). This is + * separate from any block-level metadata, but can serve a similar purpose to compactly support + * data collections on top of SkBlockAllocator. + */ + int metadata() const { return fHead.fAllocatorMetadata; } + + /** + * Set the current value of the allocator-level metadata. + */ + void setMetadata(int value) { fHead.fAllocatorMetadata = value; } + + /** + * Reserve space that will hold 'size' bytes. This will automatically allocate a new block if + * there is not enough available space in the current block to provide 'size' bytes. The + * returned ByteRange tuple specifies the Block owning the reserved memory, the full byte range, + * and the aligned offset within that range to use for the user-facing pointer. The following + * invariants hold: + * + * 1. block->ptr(alignedOffset) is aligned to Align + * 2. end - alignedOffset == size + * 3. Padding <= alignedOffset - start <= Padding + Align - 1 + * + * Invariant #3, when Padding > 0, allows intermediate allocators to embed metadata along with + * the allocations. If the Padding bytes are used for some 'struct Meta', then + * ptr(alignedOffset - sizeof(Meta)) can be safely used as a Meta* if Meta's alignment + * requirements are less than or equal to the alignment specified in allocate<>. This can be + * easily guaranteed by using the pattern: + * + * allocate<max(UserAlign, alignof(Meta)), sizeof(Meta)>(userSize); + * + * This ensures that ptr(alignedOffset) will always satisfy UserAlign and + * ptr(alignedOffset - sizeof(Meta)) will always satisfy alignof(Meta). Alternatively, memcpy + * can be used to read and write values between start and alignedOffset without worrying about + * alignment requirements of the metadata. + * + * For over-aligned allocations, the alignedOffset (as an int) may not be a multiple of Align, + * but the result of ptr(alignedOffset) will be a multiple of Align. + */ + template <size_t Align, size_t Padding = 0> + ByteRange allocate(size_t size); + + enum ReserveFlags : unsigned { + // If provided to reserve(), the input 'size' will be rounded up to the next size determined + // by the growth policy of the SkBlockAllocator. If not, 'size' will be aligned to max_align + kIgnoreGrowthPolicy_Flag = 0b01, + // If provided to reserve(), the number of available bytes of the current block will not + // be used to satisfy the reservation (assuming the contiguous range was long enough to + // begin with). + kIgnoreExistingBytes_Flag = 0b10, + + kNo_ReserveFlags = 0b00 + }; + + /** + * Ensure the block allocator has 'size' contiguous available bytes. After calling this + * function, currentBlock()->avail<Align, Padding>() may still report less than 'size' if the + * reserved space was added as a scratch block. This is done so that anything remaining in + * the current block can still be used if a smaller-than-size allocation is requested. If 'size' + * is requested by a subsequent allocation, the scratch block will automatically be activated + * and the request will not itself trigger any malloc. + * + * The optional 'flags' controls how the input size is allocated; by default it will attempt + * to use available contiguous bytes in the current block and will respect the growth policy + * of the allocator. + */ + template <size_t Align = 1, size_t Padding = 0> + void reserve(size_t size, ReserveFlags flags = kNo_ReserveFlags); + + /** + * Return a pointer to the start of the current block. This will never be null. + */ + const Block* currentBlock() const { return fTail; } + Block* currentBlock() { return fTail; } + + const Block* headBlock() const { return &fHead; } + Block* headBlock() { return &fHead; } + + /** + * Return the block that owns the allocated 'ptr'. Assuming that earlier, an allocation was + * returned as {b, start, alignedOffset, end}, and 'p = b->ptr(alignedOffset)', then a call + * to 'owningBlock<Align, Padding>(p, start) == b'. + * + * If calling code has already made a pointer to their metadata, i.e. 'm = p - Padding', then + * 'owningBlock<Align, 0>(m, start)' will also return b, allowing you to recover the block from + * the metadata pointer. + * + * If calling code has access to the original alignedOffset, this function should not be used + * since the owning block is just 'p - alignedOffset', regardless of original Align or Padding. + */ + template <size_t Align, size_t Padding = 0> + Block* owningBlock(const void* ptr, int start); + + template <size_t Align, size_t Padding = 0> + const Block* owningBlock(const void* ptr, int start) const { + return const_cast<SkBlockAllocator*>(this)->owningBlock<Align, Padding>(ptr, start); + } + + /** + * Find the owning block of the allocated pointer, 'p'. Without any additional information this + * is O(N) on the number of allocated blocks. + */ + Block* findOwningBlock(const void* ptr); + const Block* findOwningBlock(const void* ptr) const { + return const_cast<SkBlockAllocator*>(this)->findOwningBlock(ptr); + } + + /** + * Explicitly free an entire block, invalidating any remaining allocations from the block. + * SkBlockAllocator will release all alive blocks automatically when it is destroyed, but this + * function can be used to reclaim memory over the lifetime of the allocator. The provided + * 'block' pointer must have previously come from a call to currentBlock() or allocate(). + * + * If 'block' represents the inline-allocated head block, its cursor and metadata are instead + * reset to their defaults. + * + * If the block is not the head block, it may be kept as a scratch block to be reused for + * subsequent allocation requests, instead of making an entirely new block. A scratch block is + * not visible when iterating over blocks but is reported in the total size of the allocator. + */ + void releaseBlock(Block* block); + + /** + * Detach every heap-allocated block owned by 'other' and concatenate them to this allocator's + * list of blocks. This memory is now managed by this allocator. Since this only transfers + * ownership of a Block, and a Block itself does not move, any previous allocations remain + * valid and associated with their original Block instances. SkBlockAllocator-level functions + * that accept allocated pointers (e.g. findOwningBlock), must now use this allocator and not + * 'other' for these allocations. + * + * The head block of 'other' cannot be stolen, so higher-level allocators and memory structures + * must handle that data differently. + */ + void stealHeapBlocks(SkBlockAllocator* other); + + /** + * Explicitly free all blocks (invalidating all allocations), and resets the head block to its + * default state. The allocator-level metadata is reset to 0 as well. + */ + void reset(); + + /** + * Remove any reserved scratch space, either from calling reserve() or releaseBlock(). + */ + void resetScratchSpace(); + + template <bool Forward, bool Const> class BlockIter; + + /** + * Clients can iterate over all active Blocks in the SkBlockAllocator using for loops: + * + * Forward iteration from head to tail block (or non-const variant): + * for (const Block* b : this->blocks()) { } + * Reverse iteration from tail to head block: + * for (const Block* b : this->rblocks()) { } + * + * It is safe to call releaseBlock() on the active block while looping. + */ + inline BlockIter<true, false> blocks(); + inline BlockIter<true, true> blocks() const; + inline BlockIter<false, false> rblocks(); + inline BlockIter<false, true> rblocks() const; + +#ifdef SK_DEBUG + inline static constexpr uint32_t kAssignedMarker = 0xBEEFFACE; + inline static constexpr uint32_t kFreedMarker = 0xCAFEBABE; + + void validate() const; +#endif + +private: + friend class BlockAllocatorTestAccess; + friend class TBlockListTestAccess; + + inline static constexpr int kDataStart = sizeof(Block); + #ifdef SK_FORCE_8_BYTE_ALIGNMENT + // This is an issue for WASM builds using emscripten, which had std::max_align_t = 16, but + // was returning pointers only aligned to 8 bytes. + // https://github.com/emscripten-core/emscripten/issues/10072 + // + // Setting this to 8 will let SkBlockAllocator properly correct for the pointer address if + // a 16-byte aligned allocation is requested in wasm (unlikely since we don't use long + // doubles). + inline static constexpr size_t kAddressAlign = 8; + #else + // The alignment Block addresses will be at when created using operator new + // (spec-compliant is pointers are aligned to max_align_t). + inline static constexpr size_t kAddressAlign = alignof(std::max_align_t); + #endif + + // Calculates the size of a new Block required to store a kMaxAllocationSize request for the + // given alignment and padding bytes. Also represents maximum valid fCursor value in a Block. + template<size_t Align, size_t Padding> + static constexpr size_t MaxBlockSize(); + + static constexpr int BaseHeadBlockSize() { + return sizeof(SkBlockAllocator) - offsetof(SkBlockAllocator, fHead); + } + + // Append a new block to the end of the block linked list, updating fTail. 'minSize' must + // have enough room for sizeof(Block). 'maxSize' is the upper limit of fSize for the new block + // that will preserve the static guarantees SkBlockAllocator makes. + void addBlock(int minSize, int maxSize); + + int scratchBlockSize() const { return fHead.fPrev ? fHead.fPrev->fSize : 0; } + + Block* fTail; // All non-head blocks are heap allocated; tail will never be null. + + // All remaining state is packed into 64 bits to keep SkBlockAllocator at 16 bytes + head block + // (on a 64-bit system). + + // Growth of the block size is controlled by four factors: BlockIncrement, N0 and N1, and a + // policy defining how N0 is updated. When a new block is needed, we calculate N1' = N0 + N1. + // Depending on the policy, N0' = N0 (no growth or linear growth), or N0' = N1 (Fibonacci), or + // N0' = N1' (exponential). The size of the new block is N1' * BlockIncrement * MaxAlign, + // after which fN0 and fN1 store N0' and N1' clamped into 23 bits. With current bit allocations, + // N1' is limited to 2^24, and assuming MaxAlign=16, then BlockIncrement must be '2' in order to + // eventually reach the hard 2^29 size limit of SkBlockAllocator. + + // Next heap block size = (fBlockIncrement * alignof(std::max_align_t) * (fN0 + fN1)) + uint64_t fBlockIncrement : 16; + uint64_t fGrowthPolicy : 2; // GrowthPolicy + uint64_t fN0 : 23; // = 1 for linear/exp.; = 0 for fixed/fibonacci, initially + uint64_t fN1 : 23; // = 1 initially + + // Inline head block, must be at the end so that it can utilize any additional reserved space + // from the initial allocation. + // The head block's prev pointer may be non-null, which signifies a scratch block that may be + // reused instead of allocating an entirely new block (this helps when allocate+release calls + // bounce back and forth across the capacity of a block). + alignas(kAddressAlign) Block fHead; + + static_assert(kGrowthPolicyCount <= 4); +}; + +// A wrapper around SkBlockAllocator that includes preallocated storage for the head block. +// N will be the preallocSize() reported by the allocator. +template<size_t N> +class SkSBlockAllocator : SkNoncopyable { +public: + using GrowthPolicy = SkBlockAllocator::GrowthPolicy; + + SkSBlockAllocator() { + new (fStorage) SkBlockAllocator(GrowthPolicy::kFixed, N, N - sizeof(SkBlockAllocator)); + } + explicit SkSBlockAllocator(GrowthPolicy policy) { + new (fStorage) SkBlockAllocator(policy, N, N - sizeof(SkBlockAllocator)); + } + + SkSBlockAllocator(GrowthPolicy policy, size_t blockIncrementBytes) { + new (fStorage) SkBlockAllocator(policy, blockIncrementBytes, N - sizeof(SkBlockAllocator)); + } + + ~SkSBlockAllocator() { + this->allocator()->~SkBlockAllocator(); + } + + SkBlockAllocator* operator->() { return this->allocator(); } + const SkBlockAllocator* operator->() const { return this->allocator(); } + + SkBlockAllocator* allocator() { return reinterpret_cast<SkBlockAllocator*>(fStorage); } + const SkBlockAllocator* allocator() const { + return reinterpret_cast<const SkBlockAllocator*>(fStorage); + } + +private: + static_assert(N >= sizeof(SkBlockAllocator)); + + // Will be used to placement new the allocator + alignas(SkBlockAllocator) char fStorage[N]; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// Template and inline implementations + +SK_MAKE_BITFIELD_OPS(SkBlockAllocator::ReserveFlags) + +template<size_t Align, size_t Padding> +constexpr size_t SkBlockAllocator::BlockOverhead() { + static_assert(SkAlignTo(kDataStart + Padding, Align) >= sizeof(Block)); + return SkAlignTo(kDataStart + Padding, Align); +} + +template<size_t Align, size_t Padding> +constexpr size_t SkBlockAllocator::Overhead() { + // NOTE: On most platforms, SkBlockAllocator is packed; this is not the case on debug builds + // due to extra fields, or on WASM due to 4byte pointers but 16byte max align. + return std::max(sizeof(SkBlockAllocator), + offsetof(SkBlockAllocator, fHead) + BlockOverhead<Align, Padding>()); +} + +template<size_t Align, size_t Padding> +constexpr size_t SkBlockAllocator::MaxBlockSize() { + // Without loss of generality, assumes 'align' will be the largest encountered alignment for the + // allocator (if it's not, the largest align will be encountered by the compiler and pass/fail + // the same set of static asserts). + return BlockOverhead<Align, Padding>() + kMaxAllocationSize; +} + +template<size_t Align, size_t Padding> +void SkBlockAllocator::reserve(size_t size, ReserveFlags flags) { + if (size > kMaxAllocationSize) { + SK_ABORT("Allocation too large (%zu bytes requested)", size); + } + int iSize = (int) size; + if ((flags & kIgnoreExistingBytes_Flag) || + this->currentBlock()->avail<Align, Padding>() < iSize) { + + int blockSize = BlockOverhead<Align, Padding>() + iSize; + int maxSize = (flags & kIgnoreGrowthPolicy_Flag) ? blockSize + : MaxBlockSize<Align, Padding>(); + SkASSERT((size_t) maxSize <= (MaxBlockSize<Align, Padding>())); + + SkDEBUGCODE(auto oldTail = fTail;) + this->addBlock(blockSize, maxSize); + SkASSERT(fTail != oldTail); + // Releasing the just added block will move it into scratch space, allowing the original + // tail's bytes to be used first before the scratch block is activated. + this->releaseBlock(fTail); + } +} + +template <size_t Align, size_t Padding> +SkBlockAllocator::ByteRange SkBlockAllocator::allocate(size_t size) { + // Amount of extra space for a new block to make sure the allocation can succeed. + static constexpr int kBlockOverhead = (int) BlockOverhead<Align, Padding>(); + + // Ensures 'offset' and 'end' calculations will be valid + static_assert((kMaxAllocationSize + SkAlignTo(MaxBlockSize<Align, Padding>(), Align)) + <= (size_t) std::numeric_limits<int32_t>::max()); + // Ensures size + blockOverhead + addBlock's alignment operations will be valid + static_assert(kMaxAllocationSize + kBlockOverhead + ((1 << 12) - 1) // 4K align for large blocks + <= std::numeric_limits<int32_t>::max()); + + if (size > kMaxAllocationSize) { + SK_ABORT("Allocation too large (%zu bytes requested)", size); + } + + int iSize = (int) size; + int offset = fTail->cursor<Align, Padding>(); + int end = offset + iSize; + if (end > fTail->fSize) { + this->addBlock(iSize + kBlockOverhead, MaxBlockSize<Align, Padding>()); + offset = fTail->cursor<Align, Padding>(); + end = offset + iSize; + } + + // Check invariants + SkASSERT(end <= fTail->fSize); + SkASSERT(end - offset == iSize); + SkASSERT(offset - fTail->fCursor >= (int) Padding && + offset - fTail->fCursor <= (int) (Padding + Align - 1)); + SkASSERT(reinterpret_cast<uintptr_t>(fTail->ptr(offset)) % Align == 0); + + int start = fTail->fCursor; + fTail->fCursor = end; + + fTail->unpoisonRange(offset - Padding, end); + + return {fTail, start, offset, end}; +} + +template <size_t Align, size_t Padding> +SkBlockAllocator::Block* SkBlockAllocator::owningBlock(const void* p, int start) { + // 'p' was originally formed by aligning 'block + start + Padding', producing the inequality: + // block + start + Padding <= p <= block + start + Padding + Align-1 + // Rearranging this yields: + // block <= p - start - Padding <= block + Align-1 + // Masking these terms by ~(Align-1) reconstructs 'block' if the alignment of the block is + // greater than or equal to Align (since block & ~(Align-1) == (block + Align-1) & ~(Align-1) + // in that case). Overalignment does not reduce to inequality unfortunately. + if /* constexpr */ (Align <= kAddressAlign) { + Block* block = reinterpret_cast<Block*>( + (reinterpret_cast<uintptr_t>(p) - start - Padding) & ~(Align - 1)); + SkASSERT(block->fSentinel == kAssignedMarker); + return block; + } else { + // There's not a constant-time expression available to reconstruct the block from 'p', + // but this is unlikely to happen frequently. + return this->findOwningBlock(p); + } +} + +template <size_t Align, size_t Padding> +int SkBlockAllocator::Block::alignedOffset(int offset) const { + static_assert(SkIsPow2(Align)); + // Aligning adds (Padding + Align - 1) as an intermediate step, so ensure that can't overflow + static_assert(MaxBlockSize<Align, Padding>() + Padding + Align - 1 + <= (size_t) std::numeric_limits<int32_t>::max()); + + if /* constexpr */ (Align <= kAddressAlign) { + // Same as SkAlignTo, but operates on ints instead of size_t + return (offset + Padding + Align - 1) & ~(Align - 1); + } else { + // Must take into account that 'this' may be starting at a pointer that doesn't satisfy the + // larger alignment request, so must align the entire pointer, not just offset + uintptr_t blockPtr = reinterpret_cast<uintptr_t>(this); + uintptr_t alignedPtr = (blockPtr + offset + Padding + Align - 1) & ~(Align - 1); + SkASSERT(alignedPtr - blockPtr <= (uintptr_t) std::numeric_limits<int32_t>::max()); + return (int) (alignedPtr - blockPtr); + } +} + +bool SkBlockAllocator::Block::resize(int start, int end, int deltaBytes) { + SkASSERT(fSentinel == kAssignedMarker); + SkASSERT(start >= kDataStart && end <= fSize && start < end); + + if (deltaBytes > kMaxAllocationSize || deltaBytes < -kMaxAllocationSize) { + // Cannot possibly satisfy the resize and could overflow subsequent math + return false; + } + if (fCursor == end) { + int nextCursor = end + deltaBytes; + SkASSERT(nextCursor >= start); + // We still check nextCursor >= start for release builds that wouldn't assert. + if (nextCursor <= fSize && nextCursor >= start) { + if (nextCursor < fCursor) { + // The allocation got smaller; poison the space that can no longer be used. + this->poisonRange(nextCursor + 1, end); + } else { + // The allocation got larger; unpoison the space that can now be used. + this->unpoisonRange(end, nextCursor); + } + + fCursor = nextCursor; + return true; + } + } + return false; +} + +// NOTE: release is equivalent to resize(start, end, start - end), and the compiler can optimize +// most of the operations away, but it wasn't able to remove the unnecessary branch comparing the +// new cursor to the block size or old start, so release() gets a specialization. +bool SkBlockAllocator::Block::release(int start, int end) { + SkASSERT(fSentinel == kAssignedMarker); + SkASSERT(start >= kDataStart && end <= fSize && start < end); + + this->poisonRange(start, end); + + if (fCursor == end) { + fCursor = start; + return true; + } else { + return false; + } +} + +///////// Block iteration +template <bool Forward, bool Const> +class SkBlockAllocator::BlockIter { +private: + using BlockT = typename std::conditional<Const, const Block, Block>::type; + using AllocatorT = + typename std::conditional<Const, const SkBlockAllocator, SkBlockAllocator>::type; + +public: + BlockIter(AllocatorT* allocator) : fAllocator(allocator) {} + + class Item { + public: + bool operator!=(const Item& other) const { return fBlock != other.fBlock; } + + BlockT* operator*() const { return fBlock; } + + Item& operator++() { + this->advance(fNext); + return *this; + } + + private: + friend BlockIter; + + Item(BlockT* block) { this->advance(block); } + + void advance(BlockT* block) { + fBlock = block; + fNext = block ? (Forward ? block->fNext : block->fPrev) : nullptr; + if (!Forward && fNext && fNext->isScratch()) { + // For reverse-iteration only, we need to stop at the head, not the scratch block + // possibly stashed in head->prev. + fNext = nullptr; + } + SkASSERT(!fNext || !fNext->isScratch()); + } + + BlockT* fBlock; + // Cache this before operator++ so that fBlock can be released during iteration + BlockT* fNext; + }; + + Item begin() const { return Item(Forward ? &fAllocator->fHead : fAllocator->fTail); } + Item end() const { return Item(nullptr); } + +private: + AllocatorT* fAllocator; +}; + +SkBlockAllocator::BlockIter<true, false> SkBlockAllocator::blocks() { + return BlockIter<true, false>(this); +} +SkBlockAllocator::BlockIter<true, true> SkBlockAllocator::blocks() const { + return BlockIter<true, true>(this); +} +SkBlockAllocator::BlockIter<false, false> SkBlockAllocator::rblocks() { + return BlockIter<false, false>(this); +} +SkBlockAllocator::BlockIter<false, true> SkBlockAllocator::rblocks() const { + return BlockIter<false, true>(this); +} + +#endif // SkBlockAllocator_DEFINED diff --git a/gfx/skia/skia/src/base/SkBuffer.cpp b/gfx/skia/skia/src/base/SkBuffer.cpp new file mode 100644 index 0000000000..bb39782215 --- /dev/null +++ b/gfx/skia/skia/src/base/SkBuffer.cpp @@ -0,0 +1,90 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkBuffer.h" + +#include "include/private/base/SkAlign.h" +#include "include/private/base/SkMalloc.h" + +#include <cstdint> + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +const void* SkRBuffer::skip(size_t size) { + if (fValid && size <= this->available()) { + const void* pos = fPos; + fPos += size; + return pos; + } + fValid = false; + return nullptr; +} + +bool SkRBuffer::read(void* buffer, size_t size) { + if (const void* src = this->skip(size)) { + sk_careful_memcpy(buffer, src, size); + return true; + } + return false; +} + +bool SkRBuffer::skipToAlign4() { + intptr_t pos = reinterpret_cast<intptr_t>(fPos); + size_t n = SkAlign4(pos) - pos; + if (fValid && n <= this->available()) { + fPos += n; + return true; + } else { + fValid = false; + return false; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +void* SkWBuffer::skip(size_t size) { + void* result = fPos; + writeNoSizeCheck(nullptr, size); + return fData == nullptr ? nullptr : result; +} + +void SkWBuffer::writeNoSizeCheck(const void* buffer, size_t size) { + SkASSERT(fData == nullptr || fStop == nullptr || fPos + size <= fStop); + if (fData && buffer) { + sk_careful_memcpy(fPos, buffer, size); + } + fPos += size; +} + +size_t SkWBuffer::padToAlign4() { + size_t pos = this->pos(); + size_t n = SkAlign4(pos) - pos; + + if (n && fData) + { + char* p = fPos; + char* stop = p + n; + do { + *p++ = 0; + } while (p < stop); + } + fPos += n; + return n; +} + +#if 0 +#ifdef SK_DEBUG + static void AssertBuffer32(const void* buffer) + { + SkASSERT(buffer); + SkASSERT(((size_t)buffer & 3) == 0); + } +#else + #define AssertBuffer32(buffer) +#endif + +#endif diff --git a/gfx/skia/skia/src/base/SkBuffer.h b/gfx/skia/skia/src/base/SkBuffer.h new file mode 100644 index 0000000000..b30fda499d --- /dev/null +++ b/gfx/skia/skia/src/base/SkBuffer.h @@ -0,0 +1,134 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBuffer_DEFINED +#define SkBuffer_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkNoncopyable.h" +#include "src/base/SkSafeMath.h" + +#include <cstddef> +#include <cstdint> + +typedef float SkScalar; + +/** \class SkRBuffer + + Light weight class for reading data from a memory block. + The RBuffer is given the buffer to read from, with either a specified size + or no size (in which case no range checking is performed). It is iillegal + to attempt to read a value from an empty RBuffer (data == null). +*/ +class SkRBuffer : SkNoncopyable { +public: + SkRBuffer() : fData(nullptr), fPos(nullptr), fStop(nullptr) {} + + /** Initialize RBuffer with a data point and length. + */ + SkRBuffer(const void* data, size_t size) { + SkASSERT(data != nullptr || size == 0); + fData = (const char*)data; + fPos = (const char*)data; + fStop = (const char*)data + size; + } + + /** Return the number of bytes that have been read from the beginning + of the data pointer. + */ + size_t pos() const { return fPos - fData; } + /** Return the total size of the data pointer. Only defined if the length was + specified in the constructor or in a call to reset(). + */ + size_t size() const { return fStop - fData; } + /** Return true if the buffer has read to the end of the data pointer. + Only defined if the length was specified in the constructor or in a call + to reset(). Always returns true if the length was not specified. + */ + bool eof() const { return fPos >= fStop; } + + size_t available() const { return fStop - fPos; } + + bool isValid() const { return fValid; } + + /** Read the specified number of bytes from the data pointer. If buffer is not + null, copy those bytes into buffer. + */ + bool read(void* buffer, size_t size); + bool skipToAlign4(); + + bool readU8(uint8_t* x) { return this->read(x, 1); } + bool readS32(int32_t* x) { return this->read(x, 4); } + bool readU32(uint32_t* x) { return this->read(x, 4); } + + // returns nullptr on failure + const void* skip(size_t bytes); + template <typename T> const T* skipCount(size_t count) { + return static_cast<const T*>(this->skip(SkSafeMath::Mul(count, sizeof(T)))); + } + +private: + const char* fData; + const char* fPos; + const char* fStop; + bool fValid = true; +}; + +/** \class SkWBuffer + + Light weight class for writing data to a memory block. + The WBuffer is given the buffer to write into, with either a specified size + or no size, in which case no range checking is performed. An empty WBuffer + is legal, in which case no data is ever written, but the relative pos() + is updated. +*/ +class SkWBuffer : SkNoncopyable { +public: + SkWBuffer() : fData(nullptr), fPos(nullptr), fStop(nullptr) {} + SkWBuffer(void* data) { reset(data); } + SkWBuffer(void* data, size_t size) { reset(data, size); } + + void reset(void* data) { + fData = (char*)data; + fPos = (char*)data; + fStop = nullptr; // no bounds checking + } + + void reset(void* data, size_t size) { + SkASSERT(data != nullptr || size == 0); + fData = (char*)data; + fPos = (char*)data; + fStop = (char*)data + size; + } + + size_t pos() const { return fPos - fData; } + void* skip(size_t size); // return start of skipped data + + void write(const void* buffer, size_t size) { + if (size) { + this->writeNoSizeCheck(buffer, size); + } + } + + size_t padToAlign4(); + + void writePtr(const void* x) { this->writeNoSizeCheck(&x, sizeof(x)); } + void writeScalar(SkScalar x) { this->writeNoSizeCheck(&x, 4); } + void write32(int32_t x) { this->writeNoSizeCheck(&x, 4); } + void write16(int16_t x) { this->writeNoSizeCheck(&x, 2); } + void write8(int8_t x) { this->writeNoSizeCheck(&x, 1); } + void writeBool(bool x) { this->write8(x); } + +private: + void writeNoSizeCheck(const void* buffer, size_t size); + + char* fData; + char* fPos; + char* fStop; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkContainers.cpp b/gfx/skia/skia/src/base/SkContainers.cpp new file mode 100644 index 0000000000..1e36a76ec4 --- /dev/null +++ b/gfx/skia/skia/src/base/SkContainers.cpp @@ -0,0 +1,107 @@ +// Copyright 2019 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. + +#include "include/private/base/SkContainers.h" + +#include "include/private/base/SkAlign.h" +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkFeatures.h" +#include "include/private/base/SkMalloc.h" +#include "include/private/base/SkTo.h" + +#include <algorithm> +#include <cstddef> + +#if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS) +#include <malloc/malloc.h> +#elif defined(SK_BUILD_FOR_ANDROID) || (defined(SK_BUILD_FOR_UNIX) && !defined(__OpenBSD__)) +#include <malloc.h> +#elif defined(SK_BUILD_FOR_WIN) +#include <malloc.h> +#endif + +namespace { +// Return at least as many bytes to keep malloc aligned. +constexpr size_t kMinBytes = alignof(max_align_t); + +SkSpan<std::byte> complete_size(void* ptr, size_t size) { + if (ptr == nullptr) { + return {}; + } + + size_t completeSize = size; + + // Use the OS specific calls to find the actual capacity. + #if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS) + // TODO: remove the max, when the chrome implementation of malloc_size doesn't return 0. + completeSize = std::max(malloc_size(ptr), size); + #elif defined(SK_BUILD_FOR_ANDROID) && __ANDROID_API__ >= 17 + completeSize = malloc_usable_size(ptr); + SkASSERT(completeSize >= size); + #elif defined(SK_BUILD_FOR_UNIX) && !defined(__OpenBSD__) + completeSize = malloc_usable_size(ptr); + SkASSERT(completeSize >= size); + #elif defined(SK_BUILD_FOR_WIN) + completeSize = _msize(ptr); + SkASSERT(completeSize >= size); + #endif + + return {static_cast<std::byte*>(ptr), completeSize}; +} +} // namespace + +SkSpan<std::byte> SkContainerAllocator::allocate(int capacity, double growthFactor) { + SkASSERT(capacity >= 0); + SkASSERT(growthFactor >= 1.0); + SkASSERT_RELEASE(capacity <= fMaxCapacity); + + if (growthFactor > 1.0 && capacity > 0) { + capacity = this->growthFactorCapacity(capacity, growthFactor); + } + + return sk_allocate_throw(capacity * fSizeOfT); +} + +size_t SkContainerAllocator::roundUpCapacity(int64_t capacity) const { + SkASSERT(capacity >= 0); + + // If round will not go above fMaxCapacity return rounded capacity. + if (capacity < fMaxCapacity - kCapacityMultiple) { + return SkAlignTo(capacity, kCapacityMultiple); + } + + return SkToSizeT(fMaxCapacity); +} + +size_t SkContainerAllocator::growthFactorCapacity(int capacity, double growthFactor) const { + SkASSERT(capacity >= 0); + SkASSERT(growthFactor >= 1.0); + // Multiply by the growthFactor. Remember this must be done in 64-bit ints and not + // size_t because size_t changes. + const int64_t capacityGrowth = static_cast<int64_t>(capacity * growthFactor); + + // Notice that for small values of capacity, rounding up will provide most of the growth. + return this->roundUpCapacity(capacityGrowth); +} + + +SkSpan<std::byte> sk_allocate_canfail(size_t size) { + // Make sure to ask for at least the minimum number of bytes. + const size_t adjustedSize = std::max(size, kMinBytes); + void* ptr = sk_malloc_canfail(adjustedSize); + return complete_size(ptr, adjustedSize); +} + +SkSpan<std::byte> sk_allocate_throw(size_t size) { + if (size == 0) { + return {}; + } + // Make sure to ask for at least the minimum number of bytes. + const size_t adjustedSize = std::max(size, kMinBytes); + void* ptr = sk_malloc_throw(adjustedSize); + return complete_size(ptr, adjustedSize); +} + +void sk_report_container_overflow_and_die() { + SK_ABORT("Requested capacity is too large."); +} diff --git a/gfx/skia/skia/src/base/SkCubics.cpp b/gfx/skia/skia/src/base/SkCubics.cpp new file mode 100644 index 0000000000..64a4beb007 --- /dev/null +++ b/gfx/skia/skia/src/base/SkCubics.cpp @@ -0,0 +1,241 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkCubics.h" + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkFloatingPoint.h" +#include "include/private/base/SkTPin.h" +#include "src/base/SkQuads.h" + +#include <algorithm> +#include <cmath> + +static constexpr double PI = 3.141592653589793; + +static bool nearly_equal(double x, double y) { + if (sk_double_nearly_zero(x)) { + return sk_double_nearly_zero(y); + } + return sk_doubles_nearly_equal_ulps(x, y); +} + +// When the A coefficient of a cubic is close to 0, there can be floating point error +// that arises from computing a very large root. In those cases, we would rather be +// precise about the smaller 2 roots, so we have this arbitrary cutoff for when A is +// really small or small compared to B. +static bool close_to_a_quadratic(double A, double B) { + if (sk_double_nearly_zero(B)) { + return sk_double_nearly_zero(A); + } + return std::abs(A / B) < 1.0e-7; +} + +int SkCubics::RootsReal(double A, double B, double C, double D, double solution[3]) { + if (close_to_a_quadratic(A, B)) { + return SkQuads::RootsReal(B, C, D, solution); + } + if (sk_double_nearly_zero(D)) { // 0 is one root + int num = SkQuads::RootsReal(A, B, C, solution); + for (int i = 0; i < num; ++i) { + if (sk_double_nearly_zero(solution[i])) { + return num; + } + } + solution[num++] = 0; + return num; + } + if (sk_double_nearly_zero(A + B + C + D)) { // 1 is one root + int num = SkQuads::RootsReal(A, A + B, -D, solution); + for (int i = 0; i < num; ++i) { + if (sk_doubles_nearly_equal_ulps(solution[i], 1)) { + return num; + } + } + solution[num++] = 1; + return num; + } + double a, b, c; + { + // If A is zero (e.g. B was nan and thus close_to_a_quadratic was false), we will + // temporarily have infinities rolling about, but will catch that when checking + // R2MinusQ3. + double invA = sk_ieee_double_divide(1, A); + a = B * invA; + b = C * invA; + c = D * invA; + } + double a2 = a * a; + double Q = (a2 - b * 3) / 9; + double R = (2 * a2 * a - 9 * a * b + 27 * c) / 54; + double R2 = R * R; + double Q3 = Q * Q * Q; + double R2MinusQ3 = R2 - Q3; + // If one of R2 Q3 is infinite or nan, subtracting them will also be infinite/nan. + // If both are infinite or nan, the subtraction will be nan. + // In either case, we have no finite roots. + if (!std::isfinite(R2MinusQ3)) { + return 0; + } + double adiv3 = a / 3; + double r; + double* roots = solution; + if (R2MinusQ3 < 0) { // we have 3 real roots + // the divide/root can, due to finite precisions, be slightly outside of -1...1 + const double theta = acos(SkTPin(R / std::sqrt(Q3), -1., 1.)); + const double neg2RootQ = -2 * std::sqrt(Q); + + r = neg2RootQ * cos(theta / 3) - adiv3; + *roots++ = r; + + r = neg2RootQ * cos((theta + 2 * PI) / 3) - adiv3; + if (!nearly_equal(solution[0], r)) { + *roots++ = r; + } + r = neg2RootQ * cos((theta - 2 * PI) / 3) - adiv3; + if (!nearly_equal(solution[0], r) && + (roots - solution == 1 || !nearly_equal(solution[1], r))) { + *roots++ = r; + } + } else { // we have 1 real root + const double sqrtR2MinusQ3 = std::sqrt(R2MinusQ3); + A = fabs(R) + sqrtR2MinusQ3; + A = std::cbrt(A); // cube root + if (R > 0) { + A = -A; + } + if (!sk_double_nearly_zero(A)) { + A += Q / A; + } + r = A - adiv3; + *roots++ = r; + if (!sk_double_nearly_zero(R2) && + sk_doubles_nearly_equal_ulps(R2, Q3)) { + r = -A / 2 - adiv3; + if (!nearly_equal(solution[0], r)) { + *roots++ = r; + } + } + } + return static_cast<int>(roots - solution); +} + +int SkCubics::RootsValidT(double A, double B, double C, double D, + double solution[3]) { + double allRoots[3] = {0, 0, 0}; + int realRoots = SkCubics::RootsReal(A, B, C, D, allRoots); + int foundRoots = 0; + for (int index = 0; index < realRoots; ++index) { + double tValue = allRoots[index]; + if (tValue >= 1.0 && tValue <= 1.00005) { + // Make sure we do not already have 1 (or something very close) in the list of roots. + if ((foundRoots < 1 || !sk_doubles_nearly_equal_ulps(solution[0], 1)) && + (foundRoots < 2 || !sk_doubles_nearly_equal_ulps(solution[1], 1))) { + solution[foundRoots++] = 1; + } + } else if (tValue >= -0.00005 && (tValue <= 0.0 || sk_double_nearly_zero(tValue))) { + // Make sure we do not already have 0 (or something very close) in the list of roots. + if ((foundRoots < 1 || !sk_double_nearly_zero(solution[0])) && + (foundRoots < 2 || !sk_double_nearly_zero(solution[1]))) { + solution[foundRoots++] = 0; + } + } else if (tValue > 0.0 && tValue < 1.0) { + solution[foundRoots++] = tValue; + } + } + return foundRoots; +} + +static bool approximately_zero(double x) { + // This cutoff for our binary search hopefully strikes a good balance between + // performance and accuracy. + return std::abs(x) < 0.00000001; +} + +static int find_extrema_valid_t(double A, double B, double C, + double t[2]) { + // To find the local min and max of a cubic, we take the derivative and + // solve when that is equal to 0. + // d/dt (A*t^3 + B*t^2 + C*t + D) = 3A*t^2 + 2B*t + C + double roots[2] = {0, 0}; + int numRoots = SkQuads::RootsReal(3*A, 2*B, C, roots); + int validRoots = 0; + for (int i = 0; i < numRoots; i++) { + double tValue = roots[i]; + if (tValue >= 0 && tValue <= 1.0) { + t[validRoots++] = tValue; + } + } + return validRoots; +} + +static double binary_search(double A, double B, double C, double D, double start, double stop) { + SkASSERT(start <= stop); + double left = SkCubics::EvalAt(A, B, C, D, start); + if (approximately_zero(left)) { + return start; + } + double right = SkCubics::EvalAt(A, B, C, D, stop); + if (!std::isfinite(left) || !std::isfinite(right)) { + return -1; // Not going to deal with one or more endpoints being non-finite. + } + if ((left > 0 && right > 0) || (left < 0 && right < 0)) { + return -1; // We can only have a root if one is above 0 and the other is below 0. + } + + constexpr int maxIterations = 1000; // prevent infinite loop + for (int i = 0; i < maxIterations; i++) { + double step = (start + stop) / 2; + double curr = SkCubics::EvalAt(A, B, C, D, step); + if (approximately_zero(curr)) { + return step; + } + if ((curr < 0 && left < 0) || (curr > 0 && left > 0)) { + // go right + start = step; + } else { + // go left + stop = step; + } + } + return -1; +} + +int SkCubics::BinarySearchRootsValidT(double A, double B, double C, double D, + double solution[3]) { + if (!std::isfinite(A) || !std::isfinite(B) || !std::isfinite(C) || !std::isfinite(D)) { + return 0; + } + double regions[4] = {0, 0, 0, 1}; + // Find local minima and maxima + double minMax[2] = {0, 0}; + int extremaCount = find_extrema_valid_t(A, B, C, minMax); + int startIndex = 2 - extremaCount; + if (extremaCount == 1) { + regions[startIndex + 1] = minMax[0]; + } + if (extremaCount == 2) { + // While the roots will be in the range 0 to 1 inclusive, they might not be sorted. + regions[startIndex + 1] = std::min(minMax[0], minMax[1]); + regions[startIndex + 2] = std::max(minMax[0], minMax[1]); + } + // Starting at regions[startIndex] and going up through regions[3], we have + // an ascending list of numbers in the range 0 to 1.0, between which are the possible + // locations of a root. + int foundRoots = 0; + for (;startIndex < 3; startIndex++) { + double root = binary_search(A, B, C, D, regions[startIndex], regions[startIndex + 1]); + if (root >= 0) { + // Check for duplicates + if ((foundRoots < 1 || !approximately_zero(solution[0] - root)) && + (foundRoots < 2 || !approximately_zero(solution[1] - root))) { + solution[foundRoots++] = root; + } + } + } + return foundRoots; +} diff --git a/gfx/skia/skia/src/base/SkCubics.h b/gfx/skia/skia/src/base/SkCubics.h new file mode 100644 index 0000000000..7e3cbbb567 --- /dev/null +++ b/gfx/skia/skia/src/base/SkCubics.h @@ -0,0 +1,61 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#ifndef SkCubics_DEFINED +#define SkCubics_DEFINED + +/** + * Utilities for dealing with cubic formulas with one variable: + * f(t) = A*t^3 + B*t^2 + C*t + d + */ +class SkCubics { +public: + /** + * Puts up to 3 real solutions to the equation + * A*t^3 + B*t^2 + C*t + d = 0 + * in the provided array and returns how many roots that was. + */ + static int RootsReal(double A, double B, double C, double D, + double solution[3]); + + /** + * Puts up to 3 real solutions to the equation + * A*t^3 + B*t^2 + C*t + D = 0 + * in the provided array, with the constraint that t is in the range [0.0, 1.0], + * and returns how many roots that was. + */ + static int RootsValidT(double A, double B, double C, double D, + double solution[3]); + + + /** + * Puts up to 3 real solutions to the equation + * A*t^3 + B*t^2 + C*t + D = 0 + * in the provided array, with the constraint that t is in the range [0.0, 1.0], + * and returns how many roots that was. + * This is a slower method than RootsValidT, but more accurate in circumstances + * where floating point error gets too big. + */ + static int BinarySearchRootsValidT(double A, double B, double C, double D, + double solution[3]); + + /** + * Evaluates the cubic function with the 4 provided coefficients and the + * provided variable. + */ + static double EvalAt(double A, double B, double C, double D, double t) { + return A * t * t * t + + B * t * t + + C * t + + D; + } + + static double EvalAt(double coefficients[4], double t) { + return EvalAt(coefficients[0], coefficients[1], coefficients[2], coefficients[3], t); + } +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkDeque.cpp b/gfx/skia/skia/src/base/SkDeque.cpp new file mode 100644 index 0000000000..ffff336f90 --- /dev/null +++ b/gfx/skia/skia/src/base/SkDeque.cpp @@ -0,0 +1,310 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDeque.h" +#include "include/private/base/SkMalloc.h" + +#include <cstddef> + +struct SkDeque::Block { + Block* fNext; + Block* fPrev; + char* fBegin; // start of used section in this chunk + char* fEnd; // end of used section in this chunk + char* fStop; // end of the allocated chunk + + char* start() { return (char*)(this + 1); } + const char* start() const { return (const char*)(this + 1); } + + void init(size_t size) { + fNext = fPrev = nullptr; + fBegin = fEnd = nullptr; + fStop = (char*)this + size; + } +}; + +SkDeque::SkDeque(size_t elemSize, int allocCount) + : fElemSize(elemSize) + , fInitialStorage(nullptr) + , fCount(0) + , fAllocCount(allocCount) { + SkASSERT(allocCount >= 1); + fFrontBlock = fBackBlock = nullptr; + fFront = fBack = nullptr; +} + +SkDeque::SkDeque(size_t elemSize, void* storage, size_t storageSize, int allocCount) + : fElemSize(elemSize) + , fInitialStorage(storage) + , fCount(0) + , fAllocCount(allocCount) { + SkASSERT(storageSize == 0 || storage != nullptr); + SkASSERT(allocCount >= 1); + + if (storageSize >= sizeof(Block) + elemSize) { + fFrontBlock = (Block*)storage; + fFrontBlock->init(storageSize); + } else { + fFrontBlock = nullptr; + } + fBackBlock = fFrontBlock; + fFront = fBack = nullptr; +} + +SkDeque::~SkDeque() { + Block* head = fFrontBlock; + Block* initialHead = (Block*)fInitialStorage; + + while (head) { + Block* next = head->fNext; + if (head != initialHead) { + this->freeBlock(head); + } + head = next; + } +} + +void* SkDeque::push_front() { + fCount += 1; + + if (nullptr == fFrontBlock) { + fFrontBlock = this->allocateBlock(fAllocCount); + fBackBlock = fFrontBlock; // update our linklist + } + + Block* first = fFrontBlock; + char* begin; + + if (nullptr == first->fBegin) { + INIT_CHUNK: + first->fEnd = first->fStop; + begin = first->fStop - fElemSize; + } else { + begin = first->fBegin - fElemSize; + if (begin < first->start()) { // no more room in this chunk + // should we alloc more as we accumulate more elements? + first = this->allocateBlock(fAllocCount); + first->fNext = fFrontBlock; + fFrontBlock->fPrev = first; + fFrontBlock = first; + goto INIT_CHUNK; + } + } + + first->fBegin = begin; + + if (nullptr == fFront) { + SkASSERT(nullptr == fBack); + fFront = fBack = begin; + } else { + SkASSERT(fBack); + fFront = begin; + } + + return begin; +} + +void* SkDeque::push_back() { + fCount += 1; + + if (nullptr == fBackBlock) { + fBackBlock = this->allocateBlock(fAllocCount); + fFrontBlock = fBackBlock; // update our linklist + } + + Block* last = fBackBlock; + char* end; + + if (nullptr == last->fBegin) { + INIT_CHUNK: + last->fBegin = last->start(); + end = last->fBegin + fElemSize; + } else { + end = last->fEnd + fElemSize; + if (end > last->fStop) { // no more room in this chunk + // should we alloc more as we accumulate more elements? + last = this->allocateBlock(fAllocCount); + last->fPrev = fBackBlock; + fBackBlock->fNext = last; + fBackBlock = last; + goto INIT_CHUNK; + } + } + + last->fEnd = end; + end -= fElemSize; + + if (nullptr == fBack) { + SkASSERT(nullptr == fFront); + fFront = fBack = end; + } else { + SkASSERT(fFront); + fBack = end; + } + + return end; +} + +void SkDeque::pop_front() { + SkASSERT(fCount > 0); + fCount -= 1; + + Block* first = fFrontBlock; + + SkASSERT(first != nullptr); + + if (first->fBegin == nullptr) { // we were marked empty from before + first = first->fNext; + SkASSERT(first != nullptr); // else we popped too far + first->fPrev = nullptr; + this->freeBlock(fFrontBlock); + fFrontBlock = first; + } + + char* begin = first->fBegin + fElemSize; + SkASSERT(begin <= first->fEnd); + + if (begin < fFrontBlock->fEnd) { + first->fBegin = begin; + SkASSERT(first->fBegin); + fFront = first->fBegin; + } else { + first->fBegin = first->fEnd = nullptr; // mark as empty + if (nullptr == first->fNext) { + fFront = fBack = nullptr; + } else { + SkASSERT(first->fNext->fBegin); + fFront = first->fNext->fBegin; + } + } +} + +void SkDeque::pop_back() { + SkASSERT(fCount > 0); + fCount -= 1; + + Block* last = fBackBlock; + + SkASSERT(last != nullptr); + + if (last->fEnd == nullptr) { // we were marked empty from before + last = last->fPrev; + SkASSERT(last != nullptr); // else we popped too far + last->fNext = nullptr; + this->freeBlock(fBackBlock); + fBackBlock = last; + } + + char* end = last->fEnd - fElemSize; + SkASSERT(end >= last->fBegin); + + if (end > last->fBegin) { + last->fEnd = end; + SkASSERT(last->fEnd); + fBack = last->fEnd - fElemSize; + } else { + last->fBegin = last->fEnd = nullptr; // mark as empty + if (nullptr == last->fPrev) { + fFront = fBack = nullptr; + } else { + SkASSERT(last->fPrev->fEnd); + fBack = last->fPrev->fEnd - fElemSize; + } + } +} + +int SkDeque::numBlocksAllocated() const { + int numBlocks = 0; + + for (const Block* temp = fFrontBlock; temp; temp = temp->fNext) { + ++numBlocks; + } + + return numBlocks; +} + +SkDeque::Block* SkDeque::allocateBlock(int allocCount) { + Block* newBlock = (Block*)sk_malloc_throw(sizeof(Block) + allocCount * fElemSize); + newBlock->init(sizeof(Block) + allocCount * fElemSize); + return newBlock; +} + +void SkDeque::freeBlock(Block* block) { + sk_free(block); +} + +/////////////////////////////////////////////////////////////////////////////// + +SkDeque::Iter::Iter() : fCurBlock(nullptr), fPos(nullptr), fElemSize(0) {} + +SkDeque::Iter::Iter(const SkDeque& d, IterStart startLoc) { + this->reset(d, startLoc); +} + +// Due to how reset and next work, next actually returns the current element +// pointed to by fPos and then updates fPos to point to the next one. +void* SkDeque::Iter::next() { + char* pos = fPos; + + if (pos) { // if we were valid, try to move to the next setting + char* next = pos + fElemSize; + SkASSERT(next <= fCurBlock->fEnd); + if (next == fCurBlock->fEnd) { // exhausted this chunk, move to next + do { + fCurBlock = fCurBlock->fNext; + } while (fCurBlock != nullptr && fCurBlock->fBegin == nullptr); + next = fCurBlock ? fCurBlock->fBegin : nullptr; + } + fPos = next; + } + return pos; +} + +// Like next, prev actually returns the current element pointed to by fPos and +// then makes fPos point to the previous element. +void* SkDeque::Iter::prev() { + char* pos = fPos; + + if (pos) { // if we were valid, try to move to the prior setting + char* prev = pos - fElemSize; + SkASSERT(prev >= fCurBlock->fBegin - fElemSize); + if (prev < fCurBlock->fBegin) { // exhausted this chunk, move to prior + do { + fCurBlock = fCurBlock->fPrev; + } while (fCurBlock != nullptr && fCurBlock->fEnd == nullptr); + prev = fCurBlock ? fCurBlock->fEnd - fElemSize : nullptr; + } + fPos = prev; + } + return pos; +} + +// reset works by skipping through the spare blocks at the start (or end) +// of the doubly linked list until a non-empty one is found. The fPos +// member is then set to the first (or last) element in the block. If +// there are no elements in the deque both fCurBlock and fPos will come +// out of this routine nullptr. +void SkDeque::Iter::reset(const SkDeque& d, IterStart startLoc) { + fElemSize = d.fElemSize; + + if (kFront_IterStart == startLoc) { + // initialize the iterator to start at the front + fCurBlock = d.fFrontBlock; + while (fCurBlock && nullptr == fCurBlock->fBegin) { + fCurBlock = fCurBlock->fNext; + } + fPos = fCurBlock ? fCurBlock->fBegin : nullptr; + } else { + // initialize the iterator to start at the back + fCurBlock = d.fBackBlock; + while (fCurBlock && nullptr == fCurBlock->fEnd) { + fCurBlock = fCurBlock->fPrev; + } + fPos = fCurBlock ? fCurBlock->fEnd - fElemSize : nullptr; + } +} diff --git a/gfx/skia/skia/src/base/SkEndian.h b/gfx/skia/skia/src/base/SkEndian.h new file mode 100644 index 0000000000..732c248802 --- /dev/null +++ b/gfx/skia/skia/src/base/SkEndian.h @@ -0,0 +1,197 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkEndian_DEFINED +#define SkEndian_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkFeatures.h" + +#include <cstdint> + +/** \file SkEndian.h + + Macros and helper functions for handling 16 and 32 bit values in + big and little endian formats. +*/ + +#if defined(SK_CPU_LENDIAN) && defined(SK_CPU_BENDIAN) + #error "can't have both LENDIAN and BENDIAN defined" +#endif + +#if !defined(SK_CPU_LENDIAN) && !defined(SK_CPU_BENDIAN) + #error "need either LENDIAN or BENDIAN defined" +#endif + +/** Swap the two bytes in the low 16bits of the parameters. + e.g. 0x1234 -> 0x3412 +*/ +static inline uint16_t SkEndianSwap16(uint16_t value) { + return static_cast<uint16_t>((value >> 8) | ((value & 0xFF) << 8)); +} + +template<uint16_t N> struct SkTEndianSwap16 { + static const uint16_t value = static_cast<uint16_t>((N >> 8) | ((N & 0xFF) << 8)); +}; + +/** Vector version of SkEndianSwap16(), which swaps the + low two bytes of each value in the array. +*/ +static inline void SkEndianSwap16s(uint16_t array[], int count) { + SkASSERT(count == 0 || array != nullptr); + + while (--count >= 0) { + *array = SkEndianSwap16(*array); + array += 1; + } +} + +/** Reverse all 4 bytes in a 32bit value. + e.g. 0x12345678 -> 0x78563412 +*/ +static constexpr uint32_t SkEndianSwap32(uint32_t value) { + return ((value & 0xFF) << 24) | + ((value & 0xFF00) << 8) | + ((value & 0xFF0000) >> 8) | + (value >> 24); +} + +template<uint32_t N> struct SkTEndianSwap32 { + static const uint32_t value = ((N & 0xFF) << 24) | + ((N & 0xFF00) << 8) | + ((N & 0xFF0000) >> 8) | + (N >> 24); +}; + +/** Vector version of SkEndianSwap32(), which swaps the + bytes of each value in the array. +*/ +static inline void SkEndianSwap32s(uint32_t array[], int count) { + SkASSERT(count == 0 || array != nullptr); + + while (--count >= 0) { + *array = SkEndianSwap32(*array); + array += 1; + } +} + +/** Reverse all 8 bytes in a 64bit value. + e.g. 0x1122334455667788 -> 0x8877665544332211 +*/ +static inline uint64_t SkEndianSwap64(uint64_t value) { + return (((value & 0x00000000000000FFULL) << (8*7)) | + ((value & 0x000000000000FF00ULL) << (8*5)) | + ((value & 0x0000000000FF0000ULL) << (8*3)) | + ((value & 0x00000000FF000000ULL) << (8*1)) | + ((value & 0x000000FF00000000ULL) >> (8*1)) | + ((value & 0x0000FF0000000000ULL) >> (8*3)) | + ((value & 0x00FF000000000000ULL) >> (8*5)) | + ((value) >> (8*7))); +} +template<uint64_t N> struct SkTEndianSwap64 { + static const uint64_t value = (((N & 0x00000000000000FFULL) << (8*7)) | + ((N & 0x000000000000FF00ULL) << (8*5)) | + ((N & 0x0000000000FF0000ULL) << (8*3)) | + ((N & 0x00000000FF000000ULL) << (8*1)) | + ((N & 0x000000FF00000000ULL) >> (8*1)) | + ((N & 0x0000FF0000000000ULL) >> (8*3)) | + ((N & 0x00FF000000000000ULL) >> (8*5)) | + ((N) >> (8*7))); +}; + +/** Vector version of SkEndianSwap64(), which swaps the + bytes of each value in the array. +*/ +static inline void SkEndianSwap64s(uint64_t array[], int count) { + SkASSERT(count == 0 || array != nullptr); + + while (--count >= 0) { + *array = SkEndianSwap64(*array); + array += 1; + } +} + +#ifdef SK_CPU_LENDIAN + #define SkEndian_SwapBE16(n) SkEndianSwap16(n) + #define SkEndian_SwapBE32(n) SkEndianSwap32(n) + #define SkEndian_SwapBE64(n) SkEndianSwap64(n) + #define SkEndian_SwapLE16(n) (n) + #define SkEndian_SwapLE32(n) (n) + #define SkEndian_SwapLE64(n) (n) + + #define SkTEndian_SwapBE16(n) SkTEndianSwap16<n>::value + #define SkTEndian_SwapBE32(n) SkTEndianSwap32<n>::value + #define SkTEndian_SwapBE64(n) SkTEndianSwap64<n>::value + #define SkTEndian_SwapLE16(n) (n) + #define SkTEndian_SwapLE32(n) (n) + #define SkTEndian_SwapLE64(n) (n) +#else // SK_CPU_BENDIAN + #define SkEndian_SwapBE16(n) (n) + #define SkEndian_SwapBE32(n) (n) + #define SkEndian_SwapBE64(n) (n) + #define SkEndian_SwapLE16(n) SkEndianSwap16(n) + #define SkEndian_SwapLE32(n) SkEndianSwap32(n) + #define SkEndian_SwapLE64(n) SkEndianSwap64(n) + + #define SkTEndian_SwapBE16(n) (n) + #define SkTEndian_SwapBE32(n) (n) + #define SkTEndian_SwapBE64(n) (n) + #define SkTEndian_SwapLE16(n) SkTEndianSwap16<n>::value + #define SkTEndian_SwapLE32(n) SkTEndianSwap32<n>::value + #define SkTEndian_SwapLE64(n) SkTEndianSwap64<n>::value +#endif + +// When a bytestream is embedded in a 32-bit word, how far we need to +// shift the word to extract each byte from the low 8 bits by anding with 0xff. +#ifdef SK_CPU_LENDIAN + #define SkEndian_Byte0Shift 0 + #define SkEndian_Byte1Shift 8 + #define SkEndian_Byte2Shift 16 + #define SkEndian_Byte3Shift 24 +#else // SK_CPU_BENDIAN + #define SkEndian_Byte0Shift 24 + #define SkEndian_Byte1Shift 16 + #define SkEndian_Byte2Shift 8 + #define SkEndian_Byte3Shift 0 +#endif + + +#if defined(SK_UINT8_BITFIELD_LENDIAN) && defined(SK_UINT8_BITFIELD_BENDIAN) + #error "can't have both bitfield LENDIAN and BENDIAN defined" +#endif + +#if !defined(SK_UINT8_BITFIELD_LENDIAN) && !defined(SK_UINT8_BITFIELD_BENDIAN) + #ifdef SK_CPU_LENDIAN + #define SK_UINT8_BITFIELD_LENDIAN + #else + #define SK_UINT8_BITFIELD_BENDIAN + #endif +#endif + +#ifdef SK_UINT8_BITFIELD_LENDIAN + #define SK_UINT8_BITFIELD(f0, f1, f2, f3, f4, f5, f6, f7) \ + SK_OT_BYTE f0 : 1; \ + SK_OT_BYTE f1 : 1; \ + SK_OT_BYTE f2 : 1; \ + SK_OT_BYTE f3 : 1; \ + SK_OT_BYTE f4 : 1; \ + SK_OT_BYTE f5 : 1; \ + SK_OT_BYTE f6 : 1; \ + SK_OT_BYTE f7 : 1; +#else + #define SK_UINT8_BITFIELD(f0, f1, f2, f3, f4, f5, f6, f7) \ + SK_OT_BYTE f7 : 1; \ + SK_OT_BYTE f6 : 1; \ + SK_OT_BYTE f5 : 1; \ + SK_OT_BYTE f4 : 1; \ + SK_OT_BYTE f3 : 1; \ + SK_OT_BYTE f2 : 1; \ + SK_OT_BYTE f1 : 1; \ + SK_OT_BYTE f0 : 1; +#endif + +#endif diff --git a/gfx/skia/skia/src/base/SkFloatingPoint.cpp b/gfx/skia/skia/src/base/SkFloatingPoint.cpp new file mode 100644 index 0000000000..3e3d91d6e5 --- /dev/null +++ b/gfx/skia/skia/src/base/SkFloatingPoint.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkFloatingPoint.h" + +#include "include/private/base/SkAssert.h" + +#include <cmath> + +static inline int64_t double_to_twos_complement_bits(double x) { + // Convert a double to its bit pattern + int64_t bits = 0; + static_assert(sizeof(x) == sizeof(bits)); + std::memcpy(&bits, &x, sizeof(bits)); + // Convert a sign-bit int (i.e. double interpreted as int) into a 2s complement + // int. This also converts -0 (0x8000000000000000) to 0. Doing this to a double allows + // it to be compared using normal C operators (<, <=, etc.) + if (bits < 0) { + bits &= 0x7FFFFFFFFFFFFFFF; + bits = -bits; + } + return bits; +} + +// Arbitrarily chosen. +constexpr static double sk_double_epsilon = 0.0000000001; + +bool sk_doubles_nearly_equal_ulps(double a, double b, uint8_t max_ulps_diff) { + // If both of these are zero (or very close), then using Units of Least Precision + // will not be accurate and we should use sk_double_nearly_zero instead. + SkASSERT(!(fabs(a) < sk_double_epsilon && fabs(b) < sk_double_epsilon)); + // This algorithm does not work if both inputs are NaN. + SkASSERT(!(std::isnan(a) && std::isnan(b))); + // If both inputs are infinity (or actually equal), this catches it. + if (a == b) { + return true; + } + int64_t aBits = double_to_twos_complement_bits(a); + int64_t bBits = double_to_twos_complement_bits(b); + + // Find the difference in Units of Least Precision (ULPs). + return aBits < bBits + max_ulps_diff && bBits < aBits + max_ulps_diff; +} + +bool sk_double_nearly_zero(double a) { + return a == 0 || fabs(a) < sk_double_epsilon; +} diff --git a/gfx/skia/skia/src/base/SkHalf.cpp b/gfx/skia/skia/src/base/SkHalf.cpp new file mode 100644 index 0000000000..024daa29b8 --- /dev/null +++ b/gfx/skia/skia/src/base/SkHalf.cpp @@ -0,0 +1,97 @@ +/* + * Copyright 2014 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkFloatBits.h" +#include "src/base/SkHalf.h" + +uint16_t halfMantissa(SkHalf h) { + return h & 0x03ff; +} + +uint16_t halfExponent(SkHalf h) { + return (h >> 10) & 0x001f; +} + +uint16_t halfSign(SkHalf h) { + return h >> 15; +} + +union FloatUIntUnion { + uint32_t fUInt; // this must come first for the initializations below to work + float fFloat; +}; + +// based on Fabien Giesen's float_to_half_fast3() +// see https://gist.github.com/rygorous/2156668 +SkHalf SkFloatToHalf(float f) { + static const uint32_t f32infty = { 255 << 23 }; + static const uint32_t f16infty = { 31 << 23 }; + static const FloatUIntUnion magic = { 15 << 23 }; + static const uint32_t sign_mask = 0x80000000u; + static const uint32_t round_mask = ~0xfffu; + SkHalf o = 0; + + FloatUIntUnion floatUnion; + floatUnion.fFloat = f; + + uint32_t sign = floatUnion.fUInt & sign_mask; + floatUnion.fUInt ^= sign; + + // NOTE all the integer compares in this function can be safely + // compiled into signed compares since all operands are below + // 0x80000000. Important if you want fast straight SSE2 code + // (since there's no unsigned PCMPGTD). + + // Inf or NaN (all exponent bits set) + if (floatUnion.fUInt >= f32infty) + // NaN->qNaN and Inf->Inf + o = (floatUnion.fUInt > f32infty) ? 0x7e00 : 0x7c00; + // (De)normalized number or zero + else { + floatUnion.fUInt &= round_mask; + floatUnion.fFloat *= magic.fFloat; + floatUnion.fUInt -= round_mask; + // Clamp to signed infinity if overflowed + if (floatUnion.fUInt > f16infty) { + floatUnion.fUInt = f16infty; + } + + o = floatUnion.fUInt >> 13; // Take the bits! + } + + o |= sign >> 16; + return o; +} + +// based on Fabien Giesen's half_to_float_fast2() +// see https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ +float SkHalfToFloat(SkHalf h) { + static const FloatUIntUnion magic = { 126 << 23 }; + FloatUIntUnion o; + + if (halfExponent(h) == 0) + { + // Zero / Denormal + o.fUInt = magic.fUInt + halfMantissa(h); + o.fFloat -= magic.fFloat; + } + else + { + // Set mantissa + o.fUInt = halfMantissa(h) << 13; + // Set exponent + if (halfExponent(h) == 0x1f) + // Inf/NaN + o.fUInt |= (255 << 23); + else + o.fUInt |= ((127 - 15 + halfExponent(h)) << 23); + } + + // Set sign + o.fUInt |= (halfSign(h) << 31); + return o.fFloat; +} diff --git a/gfx/skia/skia/src/base/SkHalf.h b/gfx/skia/skia/src/base/SkHalf.h new file mode 100644 index 0000000000..d88c80d9db --- /dev/null +++ b/gfx/skia/skia/src/base/SkHalf.h @@ -0,0 +1,37 @@ +/* + * Copyright 2014 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkHalf_DEFINED +#define SkHalf_DEFINED + +#include "src/base/SkVx.h" + +// 16-bit floating point value +// format is 1 bit sign, 5 bits exponent, 10 bits mantissa +// only used for storage +typedef uint16_t SkHalf; + +static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-14 (minimum positive normal value) +static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504 +static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10 +static constexpr uint16_t SK_Half1 = 0x3C00; // 1 + +// convert between half and single precision floating point +float SkHalfToFloat(SkHalf h); +SkHalf SkFloatToHalf(float f); + +// Convert between half and single precision floating point, +// assuming inputs and outputs are both finite, and may +// flush values which would be denormal half floats to zero. +static inline skvx::float4 SkHalfToFloat_finite_ftz(uint64_t rgba) { + return skvx::from_half(skvx::half4::Load(&rgba)); +} +static inline skvx::half4 SkFloatToHalf_finite_ftz(const skvx::float4& c) { + return skvx::to_half(c); +} + +#endif diff --git a/gfx/skia/skia/src/base/SkLeanWindows.h b/gfx/skia/skia/src/base/SkLeanWindows.h new file mode 100644 index 0000000000..d43150db76 --- /dev/null +++ b/gfx/skia/skia/src/base/SkLeanWindows.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#ifndef SkLeanWindows_DEFINED +#define SkLeanWindows_DEFINED + +#include "include/private/base/SkFeatures.h" // IWYU pragma: keep + +#ifdef SK_BUILD_FOR_WIN +// https://devblogs.microsoft.com/oldnewthing/20091130-00/?p=15863 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# define WIN32_IS_MEAN_WAS_LOCALLY_DEFINED +# endif +# ifndef NOMINMAX +# define NOMINMAX +# define NOMINMAX_WAS_LOCALLY_DEFINED +# endif +# +# include <windows.h> +# +# ifdef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED +# undef WIN32_IS_MEAN_WAS_LOCALLY_DEFINED +# undef WIN32_LEAN_AND_MEAN +# endif +# ifdef NOMINMAX_WAS_LOCALLY_DEFINED +# undef NOMINMAX_WAS_LOCALLY_DEFINED +# undef NOMINMAX +# endif +#endif + +#endif // SkLeanWindows_DEFINED diff --git a/gfx/skia/skia/src/base/SkMSAN.h b/gfx/skia/skia/src/base/SkMSAN.h new file mode 100644 index 0000000000..85fa2fce4b --- /dev/null +++ b/gfx/skia/skia/src/base/SkMSAN.h @@ -0,0 +1,43 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkMSAN_DEFINED +#define SkMSAN_DEFINED + +#include "include/private/base/SkAssert.h" + +#include <cstddef> +#include <string.h> + +// Typically declared in LLVM's msan_interface.h. Easier for us to just re-declare. +extern "C" { + void __msan_check_mem_is_initialized(const volatile void*, size_t); + void __msan_unpoison (const volatile void*, size_t); +} + +// Code that requires initialized inputs can call this to make it clear that +// the blame for use of uninitialized data belongs further up the call stack. +static inline void sk_msan_assert_initialized(const void* begin, const void* end) { +#if defined(__has_feature) + #if __has_feature(memory_sanitizer) + __msan_check_mem_is_initialized(begin, (const char*)end - (const char*)begin); + #endif +#endif +} + +// Lie to MSAN that this range of memory is initialized. +// This can hide serious problems if overused. Every use of this should refer to a bug. +static inline void sk_msan_mark_initialized(const void* begin, const void* end, const char* skbug) { + SkASSERT(skbug && 0 != strcmp(skbug, "")); +#if defined(__has_feature) + #if __has_feature(memory_sanitizer) + __msan_unpoison(begin, (const char*)end - (const char*)begin); + #endif +#endif +} + +#endif//SkMSAN_DEFINED diff --git a/gfx/skia/skia/src/base/SkMalloc.cpp b/gfx/skia/skia/src/base/SkMalloc.cpp new file mode 100644 index 0000000000..944b4847b7 --- /dev/null +++ b/gfx/skia/skia/src/base/SkMalloc.cpp @@ -0,0 +1,22 @@ +// Copyright 2019 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. + +#include "include/private/base/SkMalloc.h" + +#include "src/base/SkSafeMath.h" + +void* sk_calloc_throw(size_t count, size_t elemSize) { + return sk_calloc_throw(SkSafeMath::Mul(count, elemSize)); +} + +void* sk_malloc_throw(size_t count, size_t elemSize) { + return sk_malloc_throw(SkSafeMath::Mul(count, elemSize)); +} + +void* sk_realloc_throw(void* buffer, size_t count, size_t elemSize) { + return sk_realloc_throw(buffer, SkSafeMath::Mul(count, elemSize)); +} + +void* sk_malloc_canfail(size_t count, size_t elemSize) { + return sk_malloc_canfail(SkSafeMath::Mul(count, elemSize)); +} diff --git a/gfx/skia/skia/src/base/SkMathPriv.cpp b/gfx/skia/skia/src/base/SkMathPriv.cpp new file mode 100644 index 0000000000..2674e69886 --- /dev/null +++ b/gfx/skia/skia/src/base/SkMathPriv.cpp @@ -0,0 +1,73 @@ +/* + * Copyright 2008 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkMathPriv.h" + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkFloatingPoint.h" + +#include <cstddef> +#include <cstdint> + +/////////////////////////////////////////////////////////////////////////////// + +/* www.worldserver.com/turk/computergraphics/FixedSqrt.pdf +*/ +int32_t SkSqrtBits(int32_t x, int count) { + SkASSERT(x >= 0 && count > 0 && (unsigned)count <= 30); + + uint32_t root = 0; + uint32_t remHi = 0; + uint32_t remLo = x; + + do { + root <<= 1; + + remHi = (remHi<<2) | (remLo>>30); + remLo <<= 2; + + uint32_t testDiv = (root << 1) + 1; + if (remHi >= testDiv) { + remHi -= testDiv; + root++; + } + } while (--count >= 0); + + return root; +} + +// Kernighan's method +int SkPopCount_portable(uint32_t n) { + int count = 0; + + while (n) { + n &= (n - 1); // Remove the lowest bit in the integer. + count++; + } + return count; +} + +// Here we strip off the unwanted bits and then return the number of trailing zero bits +int SkNthSet(uint32_t target, int n) { + SkASSERT(n < SkPopCount(target)); + + for (int i = 0; i < n; ++i) { + target &= (target - 1); // Remove the lowest bit in the integer. + } + + return SkCTZ(target); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +bool sk_floats_are_unit(const float array[], size_t count) { + bool is_unit = true; + for (size_t i = 0; i < count; ++i) { + is_unit &= (array[i] >= 0) & (array[i] <= 1); + } + return is_unit; +} diff --git a/gfx/skia/skia/src/base/SkMathPriv.h b/gfx/skia/skia/src/base/SkMathPriv.h new file mode 100644 index 0000000000..0bcb113b6d --- /dev/null +++ b/gfx/skia/skia/src/base/SkMathPriv.h @@ -0,0 +1,346 @@ +/* + * Copyright 2012 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkMathPriv_DEFINED +#define SkMathPriv_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkCPUTypes.h" +#include "include/private/base/SkTemplates.h" + +#include <cstddef> +#include <cstdint> + +/** + * Return the integer square root of value, with a bias of bitBias + */ +int32_t SkSqrtBits(int32_t value, int bitBias); + +/** Return the integer square root of n, treated as a SkFixed (16.16) + */ +static inline int32_t SkSqrt32(int32_t n) { return SkSqrtBits(n, 15); } + +/** + * Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches) + */ +static inline int SkClampPos(int value) { + return value & ~(value >> 31); +} + +/** + * Stores numer/denom and numer%denom into div and mod respectively. + */ +template <typename In, typename Out> +inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) { +#ifdef SK_CPU_ARM32 + // If we wrote this as in the else branch, GCC won't fuse the two into one + // divmod call, but rather a div call followed by a divmod. Silly! This + // version is just as fast as calling __aeabi_[u]idivmod manually, but with + // prettier code. + // + // This benches as around 2x faster than the code in the else branch. + const In d = numer/denom; + *div = static_cast<Out>(d); + *mod = static_cast<Out>(numer-d*denom); +#else + // On x86 this will just be a single idiv. + *div = static_cast<Out>(numer/denom); + *mod = static_cast<Out>(numer%denom); +#endif +} + +/** Returns -1 if n < 0, else returns 0 + */ +#define SkExtractSign(n) ((int32_t)(n) >> 31) + +/** If sign == -1, returns -n, else sign must be 0, and returns n. + Typically used in conjunction with SkExtractSign(). + */ +static inline int32_t SkApplySign(int32_t n, int32_t sign) { + SkASSERT(sign == 0 || sign == -1); + return (n ^ sign) - sign; +} + +/** Return x with the sign of y */ +static inline int32_t SkCopySign32(int32_t x, int32_t y) { + return SkApplySign(x, SkExtractSign(x ^ y)); +} + +/** Given a positive value and a positive max, return the value + pinned against max. + Note: only works as long as max - value doesn't wrap around + @return max if value >= max, else value + */ +static inline unsigned SkClampUMax(unsigned value, unsigned max) { + if (value > max) { + value = max; + } + return value; +} + +// If a signed int holds min_int (e.g. 0x80000000) it is undefined what happens when +// we negate it (even though we *know* we're 2's complement and we'll get the same +// value back). So we create this helper function that casts to size_t (unsigned) first, +// to avoid the complaint. +static inline size_t sk_negate_to_size_t(int32_t value) { +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4146) // Thanks MSVC, we know what we're negating an unsigned +#endif + return -static_cast<size_t>(value); +#if defined(_MSC_VER) +#pragma warning(pop) +#endif +} + +/////////////////////////////////////////////////////////////////////////////// + +/** Return a*b/255, truncating away any fractional bits. Only valid if both + a and b are 0..255 + */ +static inline U8CPU SkMulDiv255Trunc(U8CPU a, U8CPU b) { + SkASSERT((uint8_t)a == a); + SkASSERT((uint8_t)b == b); + unsigned prod = a*b + 1; + return (prod + (prod >> 8)) >> 8; +} + +/** Return (a*b)/255, taking the ceiling of any fractional bits. Only valid if + both a and b are 0..255. The expected result equals (a * b + 254) / 255. + */ +static inline U8CPU SkMulDiv255Ceiling(U8CPU a, U8CPU b) { + SkASSERT((uint8_t)a == a); + SkASSERT((uint8_t)b == b); + unsigned prod = a*b + 255; + return (prod + (prod >> 8)) >> 8; +} + +/** Just the rounding step in SkDiv255Round: round(value / 255) + */ +static inline unsigned SkDiv255Round(unsigned prod) { + prod += 128; + return (prod + (prod >> 8)) >> 8; +} + +/** + * Swap byte order of a 4-byte value, e.g. 0xaarrggbb -> 0xbbggrraa. + */ +#if defined(_MSC_VER) + #include <stdlib.h> + static inline uint32_t SkBSwap32(uint32_t v) { return _byteswap_ulong(v); } +#else + static inline uint32_t SkBSwap32(uint32_t v) { return __builtin_bswap32(v); } +#endif + +/* + * Return the number of set bits (i.e., the population count) in the provided uint32_t. + */ +int SkPopCount_portable(uint32_t n); + +#if defined(__GNUC__) || defined(__clang__) + static inline int SkPopCount(uint32_t n) { + return __builtin_popcount(n); + } +#else + static inline int SkPopCount(uint32_t n) { + return SkPopCount_portable(n); + } +#endif + +/* + * Return the 0-based index of the nth bit set in target + * Returns 32 if there is no nth bit set. + */ +int SkNthSet(uint32_t target, int n); + +//! Returns the number of leading zero bits (0...32) +// From Hacker's Delight 2nd Edition +constexpr int SkCLZ_portable(uint32_t x) { + int n = 32; + uint32_t y = x >> 16; if (y != 0) {n -= 16; x = y;} + y = x >> 8; if (y != 0) {n -= 8; x = y;} + y = x >> 4; if (y != 0) {n -= 4; x = y;} + y = x >> 2; if (y != 0) {n -= 2; x = y;} + y = x >> 1; if (y != 0) {return n - 2;} + return n - static_cast<int>(x); +} + +static_assert(32 == SkCLZ_portable(0)); +static_assert(31 == SkCLZ_portable(1)); +static_assert( 1 == SkCLZ_portable(1 << 30)); +static_assert( 1 == SkCLZ_portable((1 << 30) | (1 << 24) | 1)); +static_assert( 0 == SkCLZ_portable(~0U)); + +#if defined(SK_BUILD_FOR_WIN) + #include <intrin.h> + + static inline int SkCLZ(uint32_t mask) { + if (mask) { + unsigned long index = 0; + _BitScanReverse(&index, mask); + // Suppress this bogus /analyze warning. The check for non-zero + // guarantees that _BitScanReverse will succeed. + #pragma warning(suppress : 6102) // Using 'index' from failed function call + return index ^ 0x1F; + } else { + return 32; + } + } +#elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__) + static inline int SkCLZ(uint32_t mask) { + // __builtin_clz(0) is undefined, so we have to detect that case. + return mask ? __builtin_clz(mask) : 32; + } +#else + static inline int SkCLZ(uint32_t mask) { + return SkCLZ_portable(mask); + } +#endif + +//! Returns the number of trailing zero bits (0...32) +// From Hacker's Delight 2nd Edition +constexpr int SkCTZ_portable(uint32_t x) { + return 32 - SkCLZ_portable(~x & (x - 1)); +} + +static_assert(32 == SkCTZ_portable(0)); +static_assert( 0 == SkCTZ_portable(1)); +static_assert(30 == SkCTZ_portable(1 << 30)); +static_assert( 2 == SkCTZ_portable((1 << 30) | (1 << 24) | (1 << 2))); +static_assert( 0 == SkCTZ_portable(~0U)); + +#if defined(SK_BUILD_FOR_WIN) + #include <intrin.h> + + static inline int SkCTZ(uint32_t mask) { + if (mask) { + unsigned long index = 0; + _BitScanForward(&index, mask); + // Suppress this bogus /analyze warning. The check for non-zero + // guarantees that _BitScanReverse will succeed. + #pragma warning(suppress : 6102) // Using 'index' from failed function call + return index; + } else { + return 32; + } + } +#elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__) + static inline int SkCTZ(uint32_t mask) { + // __builtin_ctz(0) is undefined, so we have to detect that case. + return mask ? __builtin_ctz(mask) : 32; + } +#else + static inline int SkCTZ(uint32_t mask) { + return SkCTZ_portable(mask); + } +#endif + +/** + * Returns the log2 of the specified value, were that value to be rounded up + * to the next power of 2. It is undefined to pass 0. Examples: + * SkNextLog2(1) -> 0 + * SkNextLog2(2) -> 1 + * SkNextLog2(3) -> 2 + * SkNextLog2(4) -> 2 + * SkNextLog2(5) -> 3 + */ +static inline int SkNextLog2(uint32_t value) { + SkASSERT(value != 0); + return 32 - SkCLZ(value - 1); +} + +constexpr int SkNextLog2_portable(uint32_t value) { + SkASSERT(value != 0); + return 32 - SkCLZ_portable(value - 1); +} + +/** +* Returns the log2 of the specified value, were that value to be rounded down +* to the previous power of 2. It is undefined to pass 0. Examples: +* SkPrevLog2(1) -> 0 +* SkPrevLog2(2) -> 1 +* SkPrevLog2(3) -> 1 +* SkPrevLog2(4) -> 2 +* SkPrevLog2(5) -> 2 +*/ +static inline int SkPrevLog2(uint32_t value) { + SkASSERT(value != 0); + return 32 - SkCLZ(value >> 1); +} + +constexpr int SkPrevLog2_portable(uint32_t value) { + SkASSERT(value != 0); + return 32 - SkCLZ_portable(value >> 1); +} + +/** + * Returns the smallest power-of-2 that is >= the specified value. If value + * is already a power of 2, then it is returned unchanged. It is undefined + * if value is <= 0. + */ +static inline int SkNextPow2(int value) { + SkASSERT(value > 0); + return 1 << SkNextLog2(static_cast<uint32_t>(value)); +} + +constexpr int SkNextPow2_portable(int value) { + SkASSERT(value > 0); + return 1 << SkNextLog2_portable(static_cast<uint32_t>(value)); +} + +/** +* Returns the largest power-of-2 that is <= the specified value. If value +* is already a power of 2, then it is returned unchanged. It is undefined +* if value is <= 0. +*/ +static inline int SkPrevPow2(int value) { + SkASSERT(value > 0); + return 1 << SkPrevLog2(static_cast<uint32_t>(value)); +} + +constexpr int SkPrevPow2_portable(int value) { + SkASSERT(value > 0); + return 1 << SkPrevLog2_portable(static_cast<uint32_t>(value)); +} + +/////////////////////////////////////////////////////////////////////////////// + +/** + * Return the smallest power-of-2 >= n. + */ +static inline uint32_t GrNextPow2(uint32_t n) { + return n ? (1 << (32 - SkCLZ(n - 1))) : 1; +} + +/** + * Returns the next power of 2 >= n or n if the next power of 2 can't be represented by size_t. + */ +static inline size_t GrNextSizePow2(size_t n) { + constexpr int kNumSizeTBits = 8 * sizeof(size_t); + constexpr size_t kHighBitSet = size_t(1) << (kNumSizeTBits - 1); + + if (!n) { + return 1; + } else if (n >= kHighBitSet) { + return n; + } + + n--; + uint32_t shift = 1; + while (shift < kNumSizeTBits) { + n |= n >> shift; + shift <<= 1; + } + return n + 1; +} + +// conservative check. will return false for very large values that "could" fit +template <typename T> static inline bool SkFitsInFixed(T x) { + return SkTAbs(x) <= 32767.0f; +} + +#endif diff --git a/gfx/skia/skia/src/base/SkQuads.cpp b/gfx/skia/skia/src/base/SkQuads.cpp new file mode 100644 index 0000000000..a77837932c --- /dev/null +++ b/gfx/skia/skia/src/base/SkQuads.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#include "src/base/SkQuads.h" + +#include "include/private/base/SkFloatingPoint.h" + +#include <cmath> + +// Solve 0 = M * x + B. If M is 0, there are no solutions, unless B is also 0, +// in which case there are infinite solutions, so we just return 1 of them. +static int solve_linear(const double M, const double B, double solution[2]) { + if (sk_double_nearly_zero(M)) { + solution[0] = 0; + if (sk_double_nearly_zero(B)) { + return 1; + } + return 0; + } + solution[0] = -B / M; + if (!std::isfinite(solution[0])) { + return 0; + } + return 1; +} + +// When the A coefficient of a quadratic is close to 0, there can be floating point error +// that arises from computing a very large root. In those cases, we would rather be +// precise about the one smaller root, so we have this arbitrary cutoff for when A is +// really small or small compared to B. +static bool close_to_linear(double A, double B) { + if (sk_double_nearly_zero(B)) { + return sk_double_nearly_zero(A); + } + // This is a different threshold (tighter) than the close_to_a_quadratic in SkCubics.cpp + // because the SkQuads::RootsReal gives better answers for longer as A/B -> 0. + return std::abs(A / B) < 1.0e-16; +} + +int SkQuads::RootsReal(const double A, const double B, const double C, double solution[2]) { + if (close_to_linear(A, B)) { + return solve_linear(B, C, solution); + } + // If A is zero (e.g. B was nan and thus close_to_linear was false), we will + // temporarily have infinities rolling about, but will catch that when checking + // p2 - q. + const double p = sk_ieee_double_divide(B, 2 * A); + const double q = sk_ieee_double_divide(C, A); + /* normal form: x^2 + px + q = 0 */ + const double p2 = p * p; + if (!std::isfinite(p2 - q) || + (!sk_double_nearly_zero(p2 - q) && p2 < q)) { + return 0; + } + double sqrt_D = 0; + if (p2 > q) { + sqrt_D = sqrt(p2 - q); + } + solution[0] = sqrt_D - p; + solution[1] = -sqrt_D - p; + if (sk_double_nearly_zero(sqrt_D) || + sk_doubles_nearly_equal_ulps(solution[0], solution[1])) { + return 1; + } + return 2; +} diff --git a/gfx/skia/skia/src/base/SkQuads.h b/gfx/skia/skia/src/base/SkQuads.h new file mode 100644 index 0000000000..645d43bcd4 --- /dev/null +++ b/gfx/skia/skia/src/base/SkQuads.h @@ -0,0 +1,36 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkQuads_DEFINED +#define SkQuads_DEFINED + +/** + * Utilities for dealing with quadratic formulas with one variable: + * f(t) = A*t^2 + B*t + C + */ +class SkQuads { +public: + /** + * Puts up to 2 real solutions to the equation + * A*t^2 + B*t + C = 0 + * in the provided array. + */ + static int RootsReal(double A, double B, double C, + double solution[2]); + + /** + * Evaluates the quadratic function with the 3 provided coefficients and the + * provided variable. + */ + static double EvalAt(double A, double B, double C, double t) { + return A * t * t + + B * t + + C; + } +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkRandom.h b/gfx/skia/skia/src/base/SkRandom.h new file mode 100644 index 0000000000..96b3824896 --- /dev/null +++ b/gfx/skia/skia/src/base/SkRandom.h @@ -0,0 +1,173 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkRandom_DEFINED +#define SkRandom_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkFixed.h" +#include "include/private/base/SkFloatBits.h" + +#include <cstdint> + +typedef float SkScalar; + +/** \class SkRandom + + Utility class that implements pseudo random 32bit numbers using Marsaglia's + multiply-with-carry "mother of all" algorithm. Unlike rand(), this class holds + its own state, so that multiple instances can be used with no side-effects. + + Has a large period and all bits are well-randomized. + */ +class SkRandom { +public: + SkRandom() { init(0); } + SkRandom(uint32_t seed) { init(seed); } + SkRandom(const SkRandom& rand) : fK(rand.fK), fJ(rand.fJ) {} + + SkRandom& operator=(const SkRandom& rand) { + fK = rand.fK; + fJ = rand.fJ; + + return *this; + } + + /** Return the next pseudo random number as an unsigned 32bit value. + */ + uint32_t nextU() { + fK = kKMul*(fK & 0xffff) + (fK >> 16); + fJ = kJMul*(fJ & 0xffff) + (fJ >> 16); + return (((fK << 16) | (fK >> 16)) + fJ); + } + + /** Return the next pseudo random number as a signed 32bit value. + */ + int32_t nextS() { return (int32_t)this->nextU(); } + + /** + * Returns value [0...1) as an IEEE float + */ + float nextF() { + int floatint = 0x3f800000 | (int)(this->nextU() >> 9); + float f = SkBits2Float(floatint) - 1.0f; + return f; + } + + /** + * Returns value [min...max) as a float + */ + float nextRangeF(float min, float max) { + return min + this->nextF() * (max - min); + } + + /** Return the next pseudo random number, as an unsigned value of + at most bitCount bits. + @param bitCount The maximum number of bits to be returned + */ + uint32_t nextBits(unsigned bitCount) { + SkASSERT(bitCount > 0 && bitCount <= 32); + return this->nextU() >> (32 - bitCount); + } + + /** Return the next pseudo random unsigned number, mapped to lie within + [min, max] inclusive. + */ + uint32_t nextRangeU(uint32_t min, uint32_t max) { + SkASSERT(min <= max); + uint32_t range = max - min + 1; + if (0 == range) { + return this->nextU(); + } else { + return min + this->nextU() % range; + } + } + + /** Return the next pseudo random unsigned number, mapped to lie within + [0, count). + */ + uint32_t nextULessThan(uint32_t count) { + SkASSERT(count > 0); + return this->nextRangeU(0, count - 1); + } + + /** Return the next pseudo random number expressed as a SkScalar + in the range [0..SK_Scalar1). + */ + SkScalar nextUScalar1() { return SkFixedToScalar(this->nextUFixed1()); } + + /** Return the next pseudo random number expressed as a SkScalar + in the range [min..max). + */ + SkScalar nextRangeScalar(SkScalar min, SkScalar max) { + return this->nextUScalar1() * (max - min) + min; + } + + /** Return the next pseudo random number expressed as a SkScalar + in the range [-SK_Scalar1..SK_Scalar1). + */ + SkScalar nextSScalar1() { return SkFixedToScalar(this->nextSFixed1()); } + + /** Return the next pseudo random number as a bool. + */ + bool nextBool() { return this->nextU() >= 0x80000000; } + + /** A biased version of nextBool(). + */ + bool nextBiasedBool(SkScalar fractionTrue) { + SkASSERT(fractionTrue >= 0 && fractionTrue <= 1); + return this->nextUScalar1() <= fractionTrue; + } + + /** Reset the random object. + */ + void setSeed(uint32_t seed) { init(seed); } + +private: + // Initialize state variables with LCG. + // We must ensure that both J and K are non-zero, otherwise the + // multiply-with-carry step will forevermore return zero. + void init(uint32_t seed) { + fK = NextLCG(seed); + if (0 == fK) { + fK = NextLCG(fK); + } + fJ = NextLCG(fK); + if (0 == fJ) { + fJ = NextLCG(fJ); + } + SkASSERT(0 != fK && 0 != fJ); + } + static uint32_t NextLCG(uint32_t seed) { return kMul*seed + kAdd; } + + /** Return the next pseudo random number expressed as an unsigned SkFixed + in the range [0..SK_Fixed1). + */ + SkFixed nextUFixed1() { return this->nextU() >> 16; } + + /** Return the next pseudo random number expressed as a signed SkFixed + in the range [-SK_Fixed1..SK_Fixed1). + */ + SkFixed nextSFixed1() { return this->nextS() >> 15; } + + // See "Numerical Recipes in C", 1992 page 284 for these constants + // For the LCG that sets the initial state from a seed + enum { + kMul = 1664525, + kAdd = 1013904223 + }; + // Constants for the multiply-with-carry steps + enum { + kKMul = 30345, + kJMul = 18000, + }; + + uint32_t fK; + uint32_t fJ; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkRectMemcpy.h b/gfx/skia/skia/src/base/SkRectMemcpy.h new file mode 100644 index 0000000000..07ba0f0c65 --- /dev/null +++ b/gfx/skia/skia/src/base/SkRectMemcpy.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkRectMemcpy_DEFINED +#define SkRectMemcpy_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkTemplates.h" + +#include <cstring> + +static inline void SkRectMemcpy(void* dst, size_t dstRB, const void* src, size_t srcRB, + size_t trimRowBytes, int rowCount) { + SkASSERT(trimRowBytes <= dstRB); + SkASSERT(trimRowBytes <= srcRB); + if (trimRowBytes == dstRB && trimRowBytes == srcRB) { + memcpy(dst, src, trimRowBytes * rowCount); + return; + } + + for (int i = 0; i < rowCount; ++i) { + memcpy(dst, src, trimRowBytes); + dst = SkTAddOffset<void>(dst, dstRB); + src = SkTAddOffset<const void>(src, srcRB); + } +} + +#endif diff --git a/gfx/skia/skia/src/base/SkSafeMath.cpp b/gfx/skia/skia/src/base/SkSafeMath.cpp new file mode 100644 index 0000000000..cb69125edb --- /dev/null +++ b/gfx/skia/skia/src/base/SkSafeMath.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2023 Google LLC + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkSafeMath.h" + +size_t SkSafeMath::Add(size_t x, size_t y) { + SkSafeMath tmp; + size_t sum = tmp.add(x, y); + return tmp.ok() ? sum : SIZE_MAX; +} + +size_t SkSafeMath::Mul(size_t x, size_t y) { + SkSafeMath tmp; + size_t prod = tmp.mul(x, y); + return tmp.ok() ? prod : SIZE_MAX; +} diff --git a/gfx/skia/skia/src/base/SkSafeMath.h b/gfx/skia/skia/src/base/SkSafeMath.h new file mode 100644 index 0000000000..8ca44749f4 --- /dev/null +++ b/gfx/skia/skia/src/base/SkSafeMath.h @@ -0,0 +1,113 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkSafeMath_DEFINED +#define SkSafeMath_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" // IWYU pragma: keep +#include "include/private/base/SkTFitsIn.h" + +#include <cstddef> +#include <cstdint> +#include <limits> + +// SkSafeMath always check that a series of operations do not overflow. +// This must be correct for all platforms, because this is a check for safety at runtime. + +class SkSafeMath { +public: + SkSafeMath() = default; + + bool ok() const { return fOK; } + explicit operator bool() const { return fOK; } + + size_t mul(size_t x, size_t y) { + return sizeof(size_t) == sizeof(uint64_t) ? mul64(x, y) : mul32(x, y); + } + + size_t add(size_t x, size_t y) { + size_t result = x + y; + fOK &= result >= x; + return result; + } + + /** + * Return a + b, unless this result is an overflow/underflow. In those cases, fOK will + * be set to false, and it is undefined what this returns. + */ + int addInt(int a, int b) { + if (b < 0 && a < std::numeric_limits<int>::min() - b) { + fOK = false; + return a; + } else if (b > 0 && a > std::numeric_limits<int>::max() - b) { + fOK = false; + return a; + } + return a + b; + } + + size_t alignUp(size_t x, size_t alignment) { + SkASSERT(alignment && !(alignment & (alignment - 1))); + return add(x, alignment - 1) & ~(alignment - 1); + } + + template <typename T> T castTo(size_t value) { + if (!SkTFitsIn<T>(value)) { + fOK = false; + } + return static_cast<T>(value); + } + + // These saturate to their results + static size_t Add(size_t x, size_t y); + static size_t Mul(size_t x, size_t y); + static size_t Align4(size_t x) { + SkSafeMath safe; + return safe.alignUp(x, 4); + } + +private: + uint32_t mul32(uint32_t x, uint32_t y) { + uint64_t bx = x; + uint64_t by = y; + uint64_t result = bx * by; + fOK &= result >> 32 == 0; + // Overflow information is capture in fOK. Return the result modulo 2^32. + return (uint32_t)result; + } + + uint64_t mul64(uint64_t x, uint64_t y) { + if (x <= std::numeric_limits<uint64_t>::max() >> 32 + && y <= std::numeric_limits<uint64_t>::max() >> 32) { + return x * y; + } else { + auto hi = [](uint64_t x) { return x >> 32; }; + auto lo = [](uint64_t x) { return x & 0xFFFFFFFF; }; + + uint64_t lx_ly = lo(x) * lo(y); + uint64_t hx_ly = hi(x) * lo(y); + uint64_t lx_hy = lo(x) * hi(y); + uint64_t hx_hy = hi(x) * hi(y); + uint64_t result = 0; + result = this->add(lx_ly, (hx_ly << 32)); + result = this->add(result, (lx_hy << 32)); + fOK &= (hx_hy + (hx_ly >> 32) + (lx_hy >> 32)) == 0; + + #if defined(SK_DEBUG) && defined(__clang__) && defined(__x86_64__) + auto double_check = (unsigned __int128)x * y; + SkASSERT(result == (double_check & 0xFFFFFFFFFFFFFFFF)); + SkASSERT(!fOK || (double_check >> 64 == 0)); + #endif + + return result; + } + } + bool fOK = true; +}; + +#endif//SkSafeMath_DEFINED diff --git a/gfx/skia/skia/src/base/SkScopeExit.h b/gfx/skia/skia/src/base/SkScopeExit.h new file mode 100644 index 0000000000..9c3581b464 --- /dev/null +++ b/gfx/skia/skia/src/base/SkScopeExit.h @@ -0,0 +1,59 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkScopeExit_DEFINED +#define SkScopeExit_DEFINED + +#include "include/private/base/SkMacros.h" + +#include <functional> +#include <utility> + +/** SkScopeExit calls a std:::function<void()> in its destructor. */ +class SkScopeExit { +public: + SkScopeExit() = default; + SkScopeExit(std::function<void()> f) : fFn(std::move(f)) {} + SkScopeExit(SkScopeExit&& that) : fFn(std::move(that.fFn)) {} + + ~SkScopeExit() { + if (fFn) { + fFn(); + } + } + + void clear() { fFn = {}; } + + SkScopeExit& operator=(SkScopeExit&& that) { + fFn = std::move(that.fFn); + return *this; + } + +private: + std::function<void()> fFn; + + SkScopeExit( const SkScopeExit& ) = delete; + SkScopeExit& operator=(const SkScopeExit& ) = delete; +}; + +/** + * SK_AT_SCOPE_EXIT(stmt) evaluates stmt when the current scope ends. + * + * E.g. + * { + * int x = 5; + * { + * SK_AT_SCOPE_EXIT(x--); + * SkASSERT(x == 5); + * } + * SkASSERT(x == 4); + * } + */ +#define SK_AT_SCOPE_EXIT(stmt) \ + SkScopeExit SK_MACRO_APPEND_LINE(at_scope_exit_)([&]() { stmt; }) + +#endif // SkScopeExit_DEFINED diff --git a/gfx/skia/skia/src/base/SkSemaphore.cpp b/gfx/skia/skia/src/base/SkSemaphore.cpp new file mode 100644 index 0000000000..cb85fa9745 --- /dev/null +++ b/gfx/skia/skia/src/base/SkSemaphore.cpp @@ -0,0 +1,83 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkSemaphore.h" + +#include "include/private/base/SkFeatures.h" // IWYU pragma: keep + +#if defined(SK_BUILD_FOR_MAC) || defined(SK_BUILD_FOR_IOS) + #include <dispatch/dispatch.h> + + struct SkSemaphore::OSSemaphore { + dispatch_semaphore_t fSemaphore; + + OSSemaphore() { fSemaphore = dispatch_semaphore_create(0/*initial count*/); } + ~OSSemaphore() { dispatch_release(fSemaphore); } + + void signal(int n) { while (n --> 0) { dispatch_semaphore_signal(fSemaphore); } } + void wait() { dispatch_semaphore_wait(fSemaphore, DISPATCH_TIME_FOREVER); } + }; +#elif defined(SK_BUILD_FOR_WIN) +#include "src/base/SkLeanWindows.h" + + struct SkSemaphore::OSSemaphore { + HANDLE fSemaphore; + + OSSemaphore() { + fSemaphore = CreateSemaphore(nullptr /*security attributes, optional*/, + 0 /*initial count*/, + MAXLONG /*max count*/, + nullptr /*name, optional*/); + } + ~OSSemaphore() { CloseHandle(fSemaphore); } + + void signal(int n) { + ReleaseSemaphore(fSemaphore, n, nullptr/*returns previous count, optional*/); + } + void wait() { WaitForSingleObject(fSemaphore, INFINITE/*timeout in ms*/); } + }; +#else + // It's important we test for Mach before this. This code will compile but not work there. + #include <errno.h> + #include <semaphore.h> + struct SkSemaphore::OSSemaphore { + sem_t fSemaphore; + + OSSemaphore() { sem_init(&fSemaphore, 0/*cross process?*/, 0/*initial count*/); } + ~OSSemaphore() { sem_destroy(&fSemaphore); } + + void signal(int n) { while (n --> 0) { sem_post(&fSemaphore); } } + void wait() { + // Try until we're not interrupted. + while(sem_wait(&fSemaphore) == -1 && errno == EINTR); + } + }; +#endif + +/////////////////////////////////////////////////////////////////////////////// + +SkSemaphore::~SkSemaphore() { + delete fOSSemaphore; +} + +void SkSemaphore::osSignal(int n) { + fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; }); + fOSSemaphore->signal(n); +} + +void SkSemaphore::osWait() { + fOSSemaphoreOnce([this] { fOSSemaphore = new OSSemaphore; }); + fOSSemaphore->wait(); +} + +bool SkSemaphore::try_wait() { + int count = fCount.load(std::memory_order_relaxed); + if (count > 0) { + return fCount.compare_exchange_weak(count, count-1, std::memory_order_acquire); + } + return false; +} diff --git a/gfx/skia/skia/src/base/SkStringView.h b/gfx/skia/skia/src/base/SkStringView.h new file mode 100644 index 0000000000..f8f83ae77e --- /dev/null +++ b/gfx/skia/skia/src/base/SkStringView.h @@ -0,0 +1,51 @@ +/* + * Copyright 2021 Google LLC. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkStringView_DEFINED +#define SkStringView_DEFINED + +#include <cstring> +#include <string_view> + +namespace skstd { + +// C++20 additions +inline constexpr bool starts_with(std::string_view str, std::string_view prefix) { + if (prefix.length() > str.length()) { + return false; + } + return prefix.length() == 0 || !memcmp(str.data(), prefix.data(), prefix.length()); +} + +inline constexpr bool starts_with(std::string_view str, std::string_view::value_type c) { + return !str.empty() && str.front() == c; +} + +inline constexpr bool ends_with(std::string_view str, std::string_view suffix) { + if (suffix.length() > str.length()) { + return false; + } + return suffix.length() == 0 || !memcmp(str.data() + str.length() - suffix.length(), + suffix.data(), suffix.length()); +} + +inline constexpr bool ends_with(std::string_view str, std::string_view::value_type c) { + return !str.empty() && str.back() == c; +} + +// C++23 additions +inline constexpr bool contains(std::string_view str, std::string_view needle) { + return str.find(needle) != std::string_view::npos; +} + +inline constexpr bool contains(std::string_view str, std::string_view::value_type c) { + return str.find(c) != std::string_view::npos; +} + +} // namespace skstd + +#endif diff --git a/gfx/skia/skia/src/base/SkTBlockList.h b/gfx/skia/skia/src/base/SkTBlockList.h new file mode 100644 index 0000000000..88e91a92bb --- /dev/null +++ b/gfx/skia/skia/src/base/SkTBlockList.h @@ -0,0 +1,448 @@ +/* + * Copyright 2010 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkTBlockList_DEFINED +#define SkTBlockList_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkTo.h" +#include "src/base/SkBlockAllocator.h" + +#include <algorithm> +#include <cstring> +#include <type_traits> +#include <utility> + +// Forward declarations for the iterators used by SkTBlockList +using IndexFn = int (*)(const SkBlockAllocator::Block*); +using NextFn = int (*)(const SkBlockAllocator::Block*, int); +template<typename T, typename B> using ItemFn = T (*)(B*, int); +template <typename T, bool Forward, bool Const, IndexFn Start, IndexFn End, NextFn Next, + ItemFn<T, typename std::conditional<Const, const SkBlockAllocator::Block, + SkBlockAllocator::Block>::type> Resolve> +class BlockIndexIterator; + +/** + * SkTBlockList manages dynamic storage for instances of T, reserving fixed blocks such that + * allocation is amortized across every N instances. In this way it is a hybrid of an array-based + * vector and a linked-list. T can be any type and non-trivial destructors are automatically + * invoked when the SkTBlockList is destructed. The addresses of instances are guaranteed + * not to move except when a list is concatenated to another. + * + * The collection supports storing a templated number of elements inline before heap-allocated + * blocks are made to hold additional instances. By default, the heap blocks are sized to hold the + * same number of items as the inline block. A common pattern is to have the inline size hold only + * a small number of items for the common case and then allocate larger blocks when needed. + * + * If the size of a collection is N, and its block size is B, the complexity of the common + * operations are: + * - push_back()/emplace_back(): O(1), with malloc O(B) + * - pop_back(): O(1), with free O(B) + * - front()/back(): O(1) + * - reset(): O(N) for non-trivial types, O(N/B) for trivial types + * - concat(): O(B) + * - random access: O(N/B) + * - iteration: O(1) at each step + * + * These characteristics make it well suited for allocating items in a LIFO ordering, or otherwise + * acting as a stack, or simply using it as a typed allocator. + */ +template <typename T, int StartingItems = 1> +class SkTBlockList { +public: + /** + * Create an allocator that defaults to using StartingItems as heap increment. + */ + SkTBlockList() : SkTBlockList(StartingItems) {} + + /** + * Create an allocator + * + * @param itemsPerBlock the number of items to allocate at once + */ + explicit SkTBlockList(int itemsPerBlock, + SkBlockAllocator::GrowthPolicy policy = + SkBlockAllocator::GrowthPolicy::kFixed) + : fAllocator(policy, + SkBlockAllocator::BlockOverhead<alignof(T)>() + sizeof(T)*itemsPerBlock) {} + + ~SkTBlockList() { this->reset(); } + + /** + * Adds an item and returns it. + * + * @return the added item. + */ + T& push_back() { + return *new (this->pushItem()) T; + } + T& push_back(const T& t) { + return *new (this->pushItem()) T(t); + } + T& push_back(T&& t) { + return *new (this->pushItem()) T(std::move(t)); + } + + template <typename... Args> + T& emplace_back(Args&&... args) { + return *new (this->pushItem()) T(std::forward<Args>(args)...); + } + + /** + * Move all items from 'other' to the end of this collection. When this returns, 'other' will + * be empty. Items in 'other' may be moved as part of compacting the pre-allocated start of + * 'other' into this list (using T's move constructor or memcpy if T is trivially copyable), but + * this is O(StartingItems) and not O(N). All other items are concatenated in O(1). + */ + template <int SI> + void concat(SkTBlockList<T, SI>&& other); + + /** + * Allocate, if needed, space to hold N more Ts before another malloc will occur. + */ + void reserve(int n) { + int avail = fAllocator->currentBlock()->template avail<alignof(T)>() / sizeof(T); + if (n > avail) { + int reserved = n - avail; + // Don't consider existing bytes since we've already determined how to split the N items + fAllocator->template reserve<alignof(T)>( + reserved * sizeof(T), SkBlockAllocator::kIgnoreExistingBytes_Flag); + } + } + + /** + * Remove the last item, only call if count() != 0 + */ + void pop_back() { + SkASSERT(this->count() > 0); + + SkBlockAllocator::Block* block = fAllocator->currentBlock(); + + // Run dtor for the popped item + int releaseIndex = Last(block); + GetItem(block, releaseIndex).~T(); + + if (releaseIndex == First(block)) { + fAllocator->releaseBlock(block); + } else { + // Since this always follows LIFO, the block should always be able to release the memory + SkAssertResult(block->release(releaseIndex, releaseIndex + sizeof(T))); + block->setMetadata(Decrement(block, releaseIndex)); + } + + fAllocator->setMetadata(fAllocator->metadata() - 1); + } + + /** + * Removes all added items. + */ + void reset() { + // Invoke destructors in reverse order if not trivially destructible + if constexpr (!std::is_trivially_destructible<T>::value) { + for (T& t : this->ritems()) { + t.~T(); + } + } + + fAllocator->reset(); + } + + /** + * Returns the item count. + */ + int count() const { +#ifdef SK_DEBUG + // Confirm total count matches sum of block counts + int count = 0; + for (const auto* b :fAllocator->blocks()) { + if (b->metadata() == 0) { + continue; // skip empty + } + count += (sizeof(T) + Last(b) - First(b)) / sizeof(T); + } + SkASSERT(count == fAllocator->metadata()); +#endif + return fAllocator->metadata(); + } + + /** + * Is the count 0? + */ + bool empty() const { return this->count() == 0; } + + /** + * Access first item, only call if count() != 0 + */ + T& front() { + // This assumes that the head block actually have room to store the first item. + static_assert(StartingItems >= 1); + SkASSERT(this->count() > 0 && fAllocator->headBlock()->metadata() > 0); + return GetItem(fAllocator->headBlock(), First(fAllocator->headBlock())); + } + const T& front() const { + SkASSERT(this->count() > 0 && fAllocator->headBlock()->metadata() > 0); + return GetItem(fAllocator->headBlock(), First(fAllocator->headBlock())); + } + + /** + * Access last item, only call if count() != 0 + */ + T& back() { + SkASSERT(this->count() > 0 && fAllocator->currentBlock()->metadata() > 0); + return GetItem(fAllocator->currentBlock(), Last(fAllocator->currentBlock())); + } + const T& back() const { + SkASSERT(this->count() > 0 && fAllocator->currentBlock()->metadata() > 0); + return GetItem(fAllocator->currentBlock(), Last(fAllocator->currentBlock())); + } + + /** + * Access item by index. Not an operator[] since it should not be considered constant time. + * Use for-range loops by calling items() or ritems() instead to access all added items in order + */ + T& item(int i) { + SkASSERT(i >= 0 && i < this->count()); + + // Iterate over blocks until we find the one that contains i. + for (auto* b : fAllocator->blocks()) { + if (b->metadata() == 0) { + continue; // skip empty + } + + int start = First(b); + int end = Last(b) + sizeof(T); // exclusive + int index = start + i * sizeof(T); + if (index < end) { + return GetItem(b, index); + } else { + i -= (end - start) / sizeof(T); + } + } + SkUNREACHABLE; + } + const T& item(int i) const { + return const_cast<SkTBlockList*>(this)->item(i); + } + +private: + // Let other SkTBlockLists have access (only ever used when T and S are the same but you + // cannot have partial specializations declared as a friend...) + template<typename S, int N> friend class SkTBlockList; + friend class TBlockListTestAccess; // for fAllocator + + inline static constexpr size_t StartingSize = + SkBlockAllocator::Overhead<alignof(T)>() + StartingItems * sizeof(T); + + static T& GetItem(SkBlockAllocator::Block* block, int index) { + return *static_cast<T*>(block->ptr(index)); + } + static const T& GetItem(const SkBlockAllocator::Block* block, int index) { + return *static_cast<const T*>(block->ptr(index)); + } + static int First(const SkBlockAllocator::Block* b) { + return b->firstAlignedOffset<alignof(T)>(); + } + static int Last(const SkBlockAllocator::Block* b) { + return b->metadata(); + } + static int Increment(const SkBlockAllocator::Block* b, int index) { + return index + sizeof(T); + } + static int Decrement(const SkBlockAllocator::Block* b, int index) { + return index - sizeof(T); + } + + void* pushItem() { + // 'template' required because fAllocator is a template, calling a template member + auto br = fAllocator->template allocate<alignof(T)>(sizeof(T)); + SkASSERT(br.fStart == br.fAlignedOffset || + br.fAlignedOffset == First(fAllocator->currentBlock())); + br.fBlock->setMetadata(br.fAlignedOffset); + fAllocator->setMetadata(fAllocator->metadata() + 1); + return br.fBlock->ptr(br.fAlignedOffset); + } + + // N represents the number of items, whereas SkSBlockAllocator takes total bytes, so must + // account for the block allocator's size too. + // + // This class uses the SkBlockAllocator's metadata to track total count of items, and per-block + // metadata to track the index of the last allocated item within each block. + SkSBlockAllocator<StartingSize> fAllocator; + +public: + using Iter = BlockIndexIterator<T&, true, false, &First, &Last, &Increment, &GetItem>; + using CIter = BlockIndexIterator<const T&, true, true, &First, &Last, &Increment, &GetItem>; + using RIter = BlockIndexIterator<T&, false, false, &Last, &First, &Decrement, &GetItem>; + using CRIter = BlockIndexIterator<const T&, false, true, &Last, &First, &Decrement, &GetItem>; + + /** + * Iterate over all items in allocation order (oldest to newest) using a for-range loop: + * + * for (auto&& T : this->items()) {} + */ + Iter items() { return Iter(fAllocator.allocator()); } + CIter items() const { return CIter(fAllocator.allocator()); } + + // Iterate from newest to oldest using a for-range loop. + RIter ritems() { return RIter(fAllocator.allocator()); } + CRIter ritems() const { return CRIter(fAllocator.allocator()); } +}; + +template <typename T, int SI1> +template <int SI2> +void SkTBlockList<T, SI1>::concat(SkTBlockList<T, SI2>&& other) { + // Optimize the common case where the list to append only has a single item + if (other.empty()) { + return; + } else if (other.count() == 1) { + this->push_back(other.back()); + other.pop_back(); + return; + } + + // Manually move all items in other's head block into this list; all heap blocks from 'other' + // will be appended to the block linked list (no per-item moves needed then). + int headItemCount = 0; + SkBlockAllocator::Block* headBlock = other.fAllocator->headBlock(); + SkDEBUGCODE(int oldCount = this->count();) + if (headBlock->metadata() > 0) { + int headStart = First(headBlock); + int headEnd = Last(headBlock) + sizeof(T); // exclusive + headItemCount = (headEnd - headStart) / sizeof(T); + int avail = fAllocator->currentBlock()->template avail<alignof(T)>() / sizeof(T); + if (headItemCount > avail) { + // Make sure there is extra room for the items beyond what's already avail. Use the + // kIgnoreGrowthPolicy_Flag to make this reservation as tight as possible since + // 'other's heap blocks will be appended after it and any extra space is wasted. + fAllocator->template reserve<alignof(T)>((headItemCount - avail) * sizeof(T), + SkBlockAllocator::kIgnoreExistingBytes_Flag | + SkBlockAllocator::kIgnoreGrowthPolicy_Flag); + } + + if constexpr (std::is_trivially_copy_constructible<T>::value) { + // memcpy all items at once (or twice between current and reserved space). + SkASSERT(std::is_trivially_destructible<T>::value); + auto copy = [](SkBlockAllocator::Block* src, int start, SkBlockAllocator* dst, int n) { + auto target = dst->template allocate<alignof(T)>(n * sizeof(T)); + memcpy(target.fBlock->ptr(target.fAlignedOffset), src->ptr(start), n * sizeof(T)); + target.fBlock->setMetadata(target.fAlignedOffset + (n - 1) * sizeof(T)); + }; + + if (avail > 0) { + // Copy 0 to avail items into existing tail block + copy(headBlock, headStart, fAllocator.allocator(), std::min(headItemCount, avail)); + } + if (headItemCount > avail) { + // Copy (head count - avail) into the extra reserved space + copy(headBlock, headStart + avail * sizeof(T), + fAllocator.allocator(), headItemCount - avail); + } + fAllocator->setMetadata(fAllocator->metadata() + headItemCount); + } else { + // Move every item over one at a time + for (int i = headStart; i < headEnd; i += sizeof(T)) { + T& toMove = GetItem(headBlock, i); + this->push_back(std::move(toMove)); + // Anything of interest should have been moved, but run this since T isn't + // a trusted type. + toMove.~T(); // NOLINT(bugprone-use-after-move): calling dtor always allowed + } + } + + other.fAllocator->releaseBlock(headBlock); + } + + // other's head block must have been fully copied since it cannot be stolen + SkASSERT(other.fAllocator->headBlock()->metadata() == 0 && + fAllocator->metadata() == oldCount + headItemCount); + fAllocator->stealHeapBlocks(other.fAllocator.allocator()); + fAllocator->setMetadata(fAllocator->metadata() + + (other.fAllocator->metadata() - headItemCount)); + other.fAllocator->setMetadata(0); +} + +/** + * BlockIndexIterator provides a reusable iterator template for collections built on top of a + * SkBlockAllocator, where each item is of the same type, and the index to an item can be iterated + * over in a known manner. It supports const and non-const, and forward and reverse, assuming it's + * provided with proper functions for starting, ending, and advancing. + */ +template <typename T, // The element type (including any modifiers) + bool Forward, // Are indices within a block increasing or decreasing with iteration? + bool Const, // Whether or not T is const + IndexFn Start, // Returns the index of the first valid item in a block + IndexFn End, // Returns the index of the last valid item (so it is inclusive) + NextFn Next, // Returns the next index given the current index + ItemFn<T, typename std::conditional<Const, const SkBlockAllocator::Block, + SkBlockAllocator::Block>::type> Resolve> +class BlockIndexIterator { + using BlockIter = typename SkBlockAllocator::BlockIter<Forward, Const>; +public: + BlockIndexIterator(BlockIter iter) : fBlockIter(iter) {} + + class Item { + public: + bool operator!=(const Item& other) const { + return other.fBlock != fBlock || (SkToBool(*fBlock) && other.fIndex != fIndex); + } + + T operator*() const { + SkASSERT(*fBlock); + return Resolve(*fBlock, fIndex); + } + + Item& operator++() { + const auto* block = *fBlock; + SkASSERT(block && block->metadata() > 0); + SkASSERT((Forward && Next(block, fIndex) > fIndex) || + (!Forward && Next(block, fIndex) < fIndex)); + fIndex = Next(block, fIndex); + if ((Forward && fIndex > fEndIndex) || (!Forward && fIndex < fEndIndex)) { + ++fBlock; + this->setIndices(); + } + return *this; + } + + private: + friend BlockIndexIterator; + using BlockItem = typename BlockIter::Item; + + Item(BlockItem block) : fBlock(block) { + this->setIndices(); + } + + void setIndices() { + // Skip empty blocks + while(*fBlock && (*fBlock)->metadata() == 0) { + ++fBlock; + } + if (*fBlock) { + fIndex = Start(*fBlock); + fEndIndex = End(*fBlock); + } else { + fIndex = 0; + fEndIndex = 0; + } + + SkASSERT((Forward && fIndex <= fEndIndex) || (!Forward && fIndex >= fEndIndex)); + } + + BlockItem fBlock; + int fIndex; + int fEndIndex; + }; + + Item begin() const { return Item(fBlockIter.begin()); } + Item end() const { return Item(fBlockIter.end()); } + +private: + BlockIter fBlockIter; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkTDArray.cpp b/gfx/skia/skia/src/base/SkTDArray.cpp new file mode 100644 index 0000000000..2cf7780f95 --- /dev/null +++ b/gfx/skia/skia/src/base/SkTDArray.cpp @@ -0,0 +1,240 @@ +/* + * Copyright 2018 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkTDArray.h" + +#include "include/private/base/SkMalloc.h" +#include "include/private/base/SkTFitsIn.h" +#include "include/private/base/SkTo.h" + +#include <climits> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <new> +#include <utility> + +SkTDStorage::SkTDStorage(int sizeOfT) : fSizeOfT{sizeOfT} {} + +SkTDStorage::SkTDStorage(const void* src, int size, int sizeOfT) + : fSizeOfT{sizeOfT} + , fCapacity{size} + , fSize{size} { + if (size > 0) { + SkASSERT(src != nullptr); + size_t storageSize = this->bytes(size); + fStorage = static_cast<std::byte*>(sk_malloc_throw(storageSize)); + memcpy(fStorage, src, storageSize); + } +} + +SkTDStorage::SkTDStorage(const SkTDStorage& that) + : SkTDStorage{that.fStorage, that.fSize, that.fSizeOfT} {} + +SkTDStorage& SkTDStorage::operator=(const SkTDStorage& that) { + if (this != &that) { + if (that.fSize <= fCapacity) { + fSize = that.fSize; + if (fSize > 0) { + memcpy(fStorage, that.data(), that.size_bytes()); + } + } else { + *this = SkTDStorage{that.data(), that.size(), that.fSizeOfT}; + } + } + return *this; +} + +SkTDStorage::SkTDStorage(SkTDStorage&& that) + : fSizeOfT{that.fSizeOfT} + , fStorage(std::exchange(that.fStorage, nullptr)) + , fCapacity{that.fCapacity} + , fSize{that.fSize} {} + +SkTDStorage& SkTDStorage::operator=(SkTDStorage&& that) { + if (this != &that) { + this->~SkTDStorage(); + new (this) SkTDStorage{std::move(that)}; + } + return *this; +} + +SkTDStorage::~SkTDStorage() { + sk_free(fStorage); +} + +void SkTDStorage::reset() { + const int sizeOfT = fSizeOfT; + this->~SkTDStorage(); + new (this) SkTDStorage{sizeOfT}; +} + +void SkTDStorage::swap(SkTDStorage& that) { + SkASSERT(fSizeOfT == that.fSizeOfT); + using std::swap; + swap(fStorage, that.fStorage); + swap(fCapacity, that.fCapacity); + swap(fSize, that.fSize); +} + +void SkTDStorage::resize(int newSize) { + SkASSERT(newSize >= 0); + if (newSize > fCapacity) { + this->reserve(newSize); + } + fSize = newSize; +} + +void SkTDStorage::reserve(int newCapacity) { + SkASSERT(newCapacity >= 0); + if (newCapacity > fCapacity) { + // Establish the maximum number of elements that includes a valid count for end. In the + // largest case end() = &fArray[INT_MAX] which is 1 after the last indexable element. + static constexpr int kMaxCount = INT_MAX; + + // Assume that the array will max out. + int expandedReserve = kMaxCount; + if (kMaxCount - newCapacity > 4) { + // Add 1/4 more than we need. Add 4 to ensure this grows by at least 1. Pin to + // kMaxCount if no room for 1/4 growth. + int growth = 4 + ((newCapacity + 4) >> 2); + // Read this line as: if (count + growth < kMaxCount) { ... } + // It's rewritten to avoid signed integer overflow. + if (kMaxCount - newCapacity > growth) { + expandedReserve = newCapacity + growth; + } + } + + + // With a T size of 1, the above allocator produces the progression of 7, 15, ... Since, + // the sizeof max_align_t is often 16, there is no reason to allocate anything less than + // 16 bytes. This eliminates a realloc when pushing back bytes to an SkTDArray. + if (fSizeOfT == 1) { + // Round up to the multiple of 16. + expandedReserve = (expandedReserve + 15) & ~15; + } + + fCapacity = expandedReserve; + size_t newStorageSize = this->bytes(fCapacity); + fStorage = static_cast<std::byte*>(sk_realloc_throw(fStorage, newStorageSize)); + } +} + +void SkTDStorage::shrink_to_fit() { + if (fCapacity != fSize) { + fCapacity = fSize; + // Because calling realloc with size of 0 is implementation defined, force to a good state + // by freeing fStorage. + if (fCapacity > 0) { + fStorage = static_cast<std::byte*>(sk_realloc_throw(fStorage, this->bytes(fCapacity))); + } else { + sk_free(fStorage); + fStorage = nullptr; + } + } +} + +void SkTDStorage::erase(int index, int count) { + SkASSERT(count >= 0); + SkASSERT(fSize >= count); + SkASSERT(0 <= index && index <= fSize); + + if (count > 0) { + // Check that the resulting size fits in an int. This will abort if not. + const int newCount = this->calculateSizeOrDie(-count); + this->moveTail(index, index + count, fSize); + this->resize(newCount); + } +} + +void SkTDStorage::removeShuffle(int index) { + SkASSERT(fSize > 0); + SkASSERT(0 <= index && index < fSize); + // Check that the new count is valid. + const int newCount = this->calculateSizeOrDie(-1); + this->moveTail(index, fSize - 1, fSize); + this->resize(newCount); +} + +void* SkTDStorage::prepend() { + return this->insert(/*index=*/0); +} + +void SkTDStorage::append() { + if (fSize < fCapacity) { + fSize++; + } else { + this->insert(fSize); + } +} + +void SkTDStorage::append(int count) { + SkASSERT(count >= 0); + // Read as: if (fSize + count <= fCapacity) {...}. This is a UB safe way to avoid the add. + if (fCapacity - fSize >= count) { + fSize += count; + } else { + this->insert(fSize, count, nullptr); + } +} + +void* SkTDStorage::append(const void* src, int count) { + return this->insert(fSize, count, src); +} + +void* SkTDStorage::insert(int index) { + return this->insert(index, /*count=*/1, nullptr); +} + +void* SkTDStorage::insert(int index, int count, const void* src) { + SkASSERT(0 <= index && index <= fSize); + SkASSERT(count >= 0); + + if (count > 0) { + const int oldCount = fSize; + const int newCount = this->calculateSizeOrDie(count); + this->resize(newCount); + this->moveTail(index + count, index, oldCount); + + if (src != nullptr) { + this->copySrc(index, src, count); + } + } + + return this->address(index); +} + +bool operator==(const SkTDStorage& a, const SkTDStorage& b) { + return a.size() == b.size() && + (a.size() == 0 || !memcmp(a.data(), b.data(), a.bytes(a.size()))); +} + +int SkTDStorage::calculateSizeOrDie(int delta) { + // Check that count will not go negative. + SkASSERT_RELEASE(-fSize <= delta); + + // We take care to avoid overflow here. + // Because count and delta are both signed 32-bit ints, the sum of count and delta is at + // most 4294967294, which fits fine in uint32_t. Proof follows in assert. + static_assert(UINT32_MAX >= (uint32_t)INT_MAX + (uint32_t)INT_MAX); + uint32_t testCount = (uint32_t)fSize + (uint32_t)delta; + SkASSERT_RELEASE(SkTFitsIn<int>(testCount)); + return SkToInt(testCount); +} + +void SkTDStorage::moveTail(int to, int tailStart, int tailEnd) { + SkASSERT(0 <= to && to <= fSize); + SkASSERT(0 <= tailStart && tailStart <= tailEnd && tailEnd <= fSize); + if (to != tailStart && tailStart != tailEnd) { + this->copySrc(to, this->address(tailStart), tailEnd - tailStart); + } +} + +void SkTDStorage::copySrc(int dstIndex, const void* src, int count) { + SkASSERT(count > 0); + memmove(this->address(dstIndex), src, this->bytes(count)); +} diff --git a/gfx/skia/skia/src/base/SkTDPQueue.h b/gfx/skia/skia/src/base/SkTDPQueue.h new file mode 100644 index 0000000000..3a897130f2 --- /dev/null +++ b/gfx/skia/skia/src/base/SkTDPQueue.h @@ -0,0 +1,222 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkTDPQueue_DEFINED +#define SkTDPQueue_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkTDArray.h" +#include "include/private/base/SkTo.h" +#include "src/base/SkTSort.h" + +#include <utility> + +/** + * This class implements a priority queue. T is the type of the elements in the queue. LESS is a + * function that compares two Ts and returns true if the first is higher priority than the second. + * + * Optionally objects may know their index into the priority queue. The queue will update the index + * as the objects move through the queue. This is enabled by using a non-nullptr function for INDEX. + * When an INDEX function is provided random deletes from the queue are allowed using remove(). + * Additionally, the * priority is allowed to change as long as priorityDidChange() is called + * afterwards. In debug builds the index will be set to -1 before an element is removed from the + * queue. + */ +template <typename T, + bool (*LESS)(const T&, const T&), + int* (*INDEX)(const T&) = (int* (*)(const T&))nullptr> +class SkTDPQueue { +public: + SkTDPQueue() {} + SkTDPQueue(int reserve) { fArray.reserve(reserve); } + + SkTDPQueue(SkTDPQueue&&) = default; + SkTDPQueue& operator =(SkTDPQueue&&) = default; + + SkTDPQueue(const SkTDPQueue&) = delete; + SkTDPQueue& operator=(const SkTDPQueue&) = delete; + + /** Number of items in the queue. */ + int count() const { return fArray.size(); } + + /** Gets the next item in the queue without popping it. */ + const T& peek() const { return fArray[0]; } + T& peek() { return fArray[0]; } + + /** Removes the next item. */ + void pop() { + this->validate(); + SkDEBUGCODE(if (SkToBool(INDEX)) { *INDEX(fArray[0]) = -1; }) + if (1 == fArray.size()) { + fArray.pop_back(); + return; + } + + fArray[0] = fArray[fArray.size() - 1]; + this->setIndex(0); + fArray.pop_back(); + this->percolateDownIfNecessary(0); + + this->validate(); + } + + /** Inserts a new item in the queue based on its priority. */ + void insert(T entry) { + this->validate(); + int index = fArray.size(); + *fArray.append() = entry; + this->setIndex(fArray.size() - 1); + this->percolateUpIfNecessary(index); + this->validate(); + } + + /** Random access removal. This requires that the INDEX function is non-nullptr. */ + void remove(T entry) { + SkASSERT(nullptr != INDEX); + int index = *INDEX(entry); + SkASSERT(index >= 0 && index < fArray.size()); + this->validate(); + SkDEBUGCODE(*INDEX(fArray[index]) = -1;) + if (index == fArray.size() - 1) { + fArray.pop_back(); + return; + } + fArray[index] = fArray[fArray.size() - 1]; + fArray.pop_back(); + this->setIndex(index); + this->percolateUpOrDown(index); + this->validate(); + } + + /** Notification that the priority of an entry has changed. This must be called after an + item's priority is changed to maintain correct ordering. Changing the priority is only + allowed if an INDEX function is provided. */ + void priorityDidChange(T entry) { + SkASSERT(nullptr != INDEX); + int index = *INDEX(entry); + SkASSERT(index >= 0 && index < fArray.size()); + this->validate(index); + this->percolateUpOrDown(index); + this->validate(); + } + + /** Gets the item at index i in the priority queue (for i < this->count()). at(0) is equivalent + to peek(). Otherwise, there is no guarantee about ordering of elements in the queue. */ + T at(int i) const { return fArray[i]; } + + /** Sorts the queue into priority order. The queue is only guarenteed to remain in sorted order + * until any other operation, other than at(), is performed. + */ + void sort() { + if (fArray.size() > 1) { + SkTQSort<T>(fArray.begin(), fArray.end(), LESS); + for (int i = 0; i < fArray.size(); i++) { + this->setIndex(i); + } + this->validate(); + } + } + +private: + static int LeftOf(int x) { SkASSERT(x >= 0); return 2 * x + 1; } + static int ParentOf(int x) { SkASSERT(x > 0); return (x - 1) >> 1; } + + void percolateUpOrDown(int index) { + SkASSERT(index >= 0); + if (!percolateUpIfNecessary(index)) { + this->validate(index); + this->percolateDownIfNecessary(index); + } + } + + bool percolateUpIfNecessary(int index) { + SkASSERT(index >= 0); + bool percolated = false; + do { + if (0 == index) { + this->setIndex(index); + return percolated; + } + int p = ParentOf(index); + if (LESS(fArray[index], fArray[p])) { + using std::swap; + swap(fArray[index], fArray[p]); + this->setIndex(index); + index = p; + percolated = true; + } else { + this->setIndex(index); + return percolated; + } + this->validate(index); + } while (true); + } + + void percolateDownIfNecessary(int index) { + SkASSERT(index >= 0); + do { + int child = LeftOf(index); + + if (child >= fArray.size()) { + // We're a leaf. + this->setIndex(index); + return; + } + + if (child + 1 >= fArray.size()) { + // We only have a left child. + if (LESS(fArray[child], fArray[index])) { + using std::swap; + swap(fArray[child], fArray[index]); + this->setIndex(child); + this->setIndex(index); + return; + } + } else if (LESS(fArray[child + 1], fArray[child])) { + // The right child is the one we should swap with, if we swap. + child++; + } + + // Check if we need to swap. + if (LESS(fArray[child], fArray[index])) { + using std::swap; + swap(fArray[child], fArray[index]); + this->setIndex(index); + index = child; + } else { + // We're less than both our children. + this->setIndex(index); + return; + } + this->validate(index); + } while (true); + } + + void setIndex(int index) { + SkASSERT(index < fArray.size()); + if (SkToBool(INDEX)) { + *INDEX(fArray[index]) = index; + } + } + + void validate(int excludedIndex = -1) const { +#ifdef SK_DEBUG + for (int i = 1; i < fArray.size(); ++i) { + int p = ParentOf(i); + if (excludedIndex != p && excludedIndex != i) { + SkASSERT(!(LESS(fArray[i], fArray[p]))); + SkASSERT(!SkToBool(INDEX) || *INDEX(fArray[i]) == i); + } + } +#endif + } + + SkTDArray<T> fArray; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkTInternalLList.h b/gfx/skia/skia/src/base/SkTInternalLList.h new file mode 100644 index 0000000000..5b655a35eb --- /dev/null +++ b/gfx/skia/skia/src/base/SkTInternalLList.h @@ -0,0 +1,304 @@ +/* + * Copyright 2012 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkTInternalLList_DEFINED +#define SkTInternalLList_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkTo.h" + +/** + * This macro creates the member variables required by the SkTInternalLList class. It should be + * placed in the private section of any class that will be stored in a double linked list. + */ +#define SK_DECLARE_INTERNAL_LLIST_INTERFACE(ClassName) \ + friend class SkTInternalLList<ClassName>; \ + /* back pointer to the owning list - for debugging */ \ + SkDEBUGCODE(SkTInternalLList<ClassName>* fList = nullptr;) \ + ClassName* fPrev = nullptr; \ + ClassName* fNext = nullptr + +/** + * This class implements a templated internal doubly linked list data structure. + */ +template <class T> class SkTInternalLList { +public: + SkTInternalLList() {} + + void reset() { + fHead = nullptr; + fTail = nullptr; + } + + void remove(T* entry) { + SkASSERT(fHead && fTail); + SkASSERT(this->isInList(entry)); + + T* prev = entry->fPrev; + T* next = entry->fNext; + + if (prev) { + prev->fNext = next; + } else { + fHead = next; + } + if (next) { + next->fPrev = prev; + } else { + fTail = prev; + } + + entry->fPrev = nullptr; + entry->fNext = nullptr; + +#ifdef SK_DEBUG + entry->fList = nullptr; +#endif + } + + void addToHead(T* entry) { + SkASSERT(nullptr == entry->fPrev && nullptr == entry->fNext); + SkASSERT(nullptr == entry->fList); + + entry->fPrev = nullptr; + entry->fNext = fHead; + if (fHead) { + fHead->fPrev = entry; + } + fHead = entry; + if (nullptr == fTail) { + fTail = entry; + } + +#ifdef SK_DEBUG + entry->fList = this; +#endif + } + + void addToTail(T* entry) { + SkASSERT(nullptr == entry->fPrev && nullptr == entry->fNext); + SkASSERT(nullptr == entry->fList); + + entry->fPrev = fTail; + entry->fNext = nullptr; + if (fTail) { + fTail->fNext = entry; + } + fTail = entry; + if (nullptr == fHead) { + fHead = entry; + } + +#ifdef SK_DEBUG + entry->fList = this; +#endif + } + + /** + * Inserts a new list entry before an existing list entry. The new entry must not already be + * a member of this or any other list. If existingEntry is NULL then the new entry is added + * at the tail. + */ + void addBefore(T* newEntry, T* existingEntry) { + SkASSERT(newEntry); + + if (nullptr == existingEntry) { + this->addToTail(newEntry); + return; + } + + SkASSERT(this->isInList(existingEntry)); + newEntry->fNext = existingEntry; + T* prev = existingEntry->fPrev; + existingEntry->fPrev = newEntry; + newEntry->fPrev = prev; + if (nullptr == prev) { + SkASSERT(fHead == existingEntry); + fHead = newEntry; + } else { + prev->fNext = newEntry; + } +#ifdef SK_DEBUG + newEntry->fList = this; +#endif + } + + /** + * Inserts a new list entry after an existing list entry. The new entry must not already be + * a member of this or any other list. If existingEntry is NULL then the new entry is added + * at the head. + */ + void addAfter(T* newEntry, T* existingEntry) { + SkASSERT(newEntry); + + if (nullptr == existingEntry) { + this->addToHead(newEntry); + return; + } + + SkASSERT(this->isInList(existingEntry)); + newEntry->fPrev = existingEntry; + T* next = existingEntry->fNext; + existingEntry->fNext = newEntry; + newEntry->fNext = next; + if (nullptr == next) { + SkASSERT(fTail == existingEntry); + fTail = newEntry; + } else { + next->fPrev = newEntry; + } +#ifdef SK_DEBUG + newEntry->fList = this; +#endif + } + + void concat(SkTInternalLList&& list) { + if (list.isEmpty()) { + return; + } + + list.fHead->fPrev = fTail; + if (!fHead) { + SkASSERT(!list.fHead->fPrev); + fHead = list.fHead; + } else { + SkASSERT(fTail); + fTail->fNext = list.fHead; + } + fTail = list.fTail; + +#ifdef SK_DEBUG + for (T* node = list.fHead; node; node = node->fNext) { + SkASSERT(node->fList == &list); + node->fList = this; + } +#endif + + list.fHead = list.fTail = nullptr; + } + + bool isEmpty() const { + SkASSERT(SkToBool(fHead) == SkToBool(fTail)); + return !fHead; + } + + T* head() const { return fHead; } + T* tail() const { return fTail; } + + class Iter { + public: + enum IterStart { + kHead_IterStart, + kTail_IterStart + }; + + Iter() : fCurr(nullptr) {} + Iter(const Iter& iter) : fCurr(iter.fCurr) {} + Iter& operator= (const Iter& iter) { fCurr = iter.fCurr; return *this; } + + T* init(const SkTInternalLList& list, IterStart startLoc) { + if (kHead_IterStart == startLoc) { + fCurr = list.fHead; + } else { + SkASSERT(kTail_IterStart == startLoc); + fCurr = list.fTail; + } + + return fCurr; + } + + T* get() { return fCurr; } + + /** + * Return the next/previous element in the list or NULL if at the end. + */ + T* next() { + if (nullptr == fCurr) { + return nullptr; + } + + fCurr = fCurr->fNext; + return fCurr; + } + + T* prev() { + if (nullptr == fCurr) { + return nullptr; + } + + fCurr = fCurr->fPrev; + return fCurr; + } + + /** + * C++11 range-for interface. + */ + bool operator!=(const Iter& that) { return fCurr != that.fCurr; } + T* operator*() { return this->get(); } + void operator++() { this->next(); } + + private: + T* fCurr; + }; + + Iter begin() const { + Iter iter; + iter.init(*this, Iter::kHead_IterStart); + return iter; + } + + Iter end() const { return Iter(); } + +#ifdef SK_DEBUG + void validate() const { + SkASSERT(!fHead == !fTail); + Iter iter; + for (T* item = iter.init(*this, Iter::kHead_IterStart); item; item = iter.next()) { + SkASSERT(this->isInList(item)); + if (nullptr == item->fPrev) { + SkASSERT(fHead == item); + } else { + SkASSERT(item->fPrev->fNext == item); + } + if (nullptr == item->fNext) { + SkASSERT(fTail == item); + } else { + SkASSERT(item->fNext->fPrev == item); + } + } + } + + /** + * Debugging-only method that uses the list back pointer to check if 'entry' is indeed in 'this' + * list. + */ + bool isInList(const T* entry) const { + return entry->fList == this; + } + + /** + * Debugging-only method that laboriously counts the list entries. + */ + int countEntries() const { + int count = 0; + for (T* entry = fHead; entry; entry = entry->fNext) { + ++count; + } + return count; + } +#endif // SK_DEBUG + +private: + T* fHead = nullptr; + T* fTail = nullptr; + + SkTInternalLList(const SkTInternalLList&) = delete; + SkTInternalLList& operator=(const SkTInternalLList&) = delete; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkTLazy.h b/gfx/skia/skia/src/base/SkTLazy.h new file mode 100644 index 0000000000..38b3b373db --- /dev/null +++ b/gfx/skia/skia/src/base/SkTLazy.h @@ -0,0 +1,208 @@ +/* + * Copyright 2011 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkTLazy_DEFINED +#define SkTLazy_DEFINED + +#include "include/private/base/SkAssert.h" + +#include <optional> +#include <utility> + +/** + * Efficient way to defer allocating/initializing a class until it is needed + * (if ever). + */ +template <typename T> class SkTLazy { +public: + SkTLazy() = default; + explicit SkTLazy(const T* src) : fValue(src ? std::optional<T>(*src) : std::nullopt) {} + SkTLazy(const SkTLazy& that) : fValue(that.fValue) {} + SkTLazy(SkTLazy&& that) : fValue(std::move(that.fValue)) {} + + ~SkTLazy() = default; + + SkTLazy& operator=(const SkTLazy& that) { + fValue = that.fValue; + return *this; + } + + SkTLazy& operator=(SkTLazy&& that) { + fValue = std::move(that.fValue); + return *this; + } + + /** + * Return a pointer to an instance of the class initialized with 'args'. + * If a previous instance had been initialized (either from init() or + * set()) it will first be destroyed, so that a freshly initialized + * instance is always returned. + */ + template <typename... Args> T* init(Args&&... args) { + fValue.emplace(std::forward<Args>(args)...); + return this->get(); + } + + /** + * Copy src into this, and return a pointer to a copy of it. Note this + * will always return the same pointer, so if it is called on a lazy that + * has already been initialized, then this will copy over the previous + * contents. + */ + T* set(const T& src) { + fValue = src; + return this->get(); + } + + T* set(T&& src) { + fValue = std::move(src); + return this->get(); + } + + /** + * Destroy the lazy object (if it was created via init() or set()) + */ + void reset() { + fValue.reset(); + } + + /** + * Returns true if a valid object has been initialized in the SkTLazy, + * false otherwise. + */ + bool isValid() const { return fValue.has_value(); } + + /** + * Returns the object. This version should only be called when the caller + * knows that the object has been initialized. + */ + T* get() { + SkASSERT(fValue.has_value()); + return &fValue.value(); + } + const T* get() const { + SkASSERT(fValue.has_value()); + return &fValue.value(); + } + + T* operator->() { return this->get(); } + const T* operator->() const { return this->get(); } + + T& operator*() { + SkASSERT(fValue.has_value()); + return *fValue; + } + const T& operator*() const { + SkASSERT(fValue.has_value()); + return *fValue; + } + + /** + * Like above but doesn't assert if object isn't initialized (in which case + * nullptr is returned). + */ + const T* getMaybeNull() const { return fValue.has_value() ? this->get() : nullptr; } + T* getMaybeNull() { return fValue.has_value() ? this->get() : nullptr; } + +private: + std::optional<T> fValue; +}; + +/** + * A helper built on top of std::optional to do copy-on-first-write. The object is initialized + * with a const pointer but provides a non-const pointer accessor. The first time the + * accessor is called (if ever) the object is cloned. + * + * In the following example at most one copy of constThing is made: + * + * SkTCopyOnFirstWrite<Thing> thing(&constThing); + * ... + * function_that_takes_a_const_thing_ptr(thing); // constThing is passed + * ... + * if (need_to_modify_thing()) { + * thing.writable()->modifyMe(); // makes a copy of constThing + * } + * ... + * x = thing->readSomething(); + * ... + * if (need_to_modify_thing_now()) { + * thing.writable()->changeMe(); // makes a copy of constThing if we didn't call modifyMe() + * } + * + * consume_a_thing(thing); // could be constThing or a modified copy. + */ +template <typename T> +class SkTCopyOnFirstWrite { +public: + explicit SkTCopyOnFirstWrite(const T& initial) : fObj(&initial) {} + + explicit SkTCopyOnFirstWrite(const T* initial) : fObj(initial) {} + + // Constructor for delayed initialization. + SkTCopyOnFirstWrite() : fObj(nullptr) {} + + SkTCopyOnFirstWrite(const SkTCopyOnFirstWrite& that) { *this = that; } + SkTCopyOnFirstWrite( SkTCopyOnFirstWrite&& that) { *this = std::move(that); } + + SkTCopyOnFirstWrite& operator=(const SkTCopyOnFirstWrite& that) { + fLazy = that.fLazy; + fObj = fLazy.has_value() ? &fLazy.value() : that.fObj; + return *this; + } + + SkTCopyOnFirstWrite& operator=(SkTCopyOnFirstWrite&& that) { + fLazy = std::move(that.fLazy); + fObj = fLazy.has_value() ? &fLazy.value() : that.fObj; + return *this; + } + + // Should only be called once, and only if the default constructor was used. + void init(const T& initial) { + SkASSERT(!fObj); + SkASSERT(!fLazy.has_value()); + fObj = &initial; + } + + // If not already initialized, in-place instantiates the writable object + template <typename... Args> + void initIfNeeded(Args&&... args) { + if (!fObj) { + SkASSERT(!fLazy.has_value()); + fObj = &fLazy.emplace(std::forward<Args>(args)...); + } + } + + /** + * Returns a writable T*. The first time this is called the initial object is cloned. + */ + T* writable() { + SkASSERT(fObj); + if (!fLazy.has_value()) { + fLazy = *fObj; + fObj = &fLazy.value(); + } + return &fLazy.value(); + } + + const T* get() const { return fObj; } + + /** + * Operators for treating this as though it were a const pointer. + */ + + const T *operator->() const { return fObj; } + + operator const T*() const { return fObj; } + + const T& operator *() const { return *fObj; } + +private: + const T* fObj; + std::optional<T> fLazy; +}; + +#endif diff --git a/gfx/skia/skia/src/base/SkTSearch.cpp b/gfx/skia/skia/src/base/SkTSearch.cpp new file mode 100644 index 0000000000..d91772e03b --- /dev/null +++ b/gfx/skia/skia/src/base/SkTSearch.cpp @@ -0,0 +1,117 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + + +#include "src/base/SkTSearch.h" + +#include "include/private/base/SkMalloc.h" + +#include <cstring> +#include <ctype.h> + +static inline const char* index_into_base(const char*const* base, int index, + size_t elemSize) +{ + return *(const char*const*)((const char*)base + index * elemSize); +} + +int SkStrSearch(const char*const* base, int count, const char target[], + size_t target_len, size_t elemSize) +{ + if (count <= 0) + return ~0; + + SkASSERT(base != nullptr); + + int lo = 0; + int hi = count - 1; + + while (lo < hi) + { + int mid = (hi + lo) >> 1; + const char* elem = index_into_base(base, mid, elemSize); + + int cmp = strncmp(elem, target, target_len); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0 || strlen(elem) > target_len) + hi = mid; + else + return mid; + } + + const char* elem = index_into_base(base, hi, elemSize); + int cmp = strncmp(elem, target, target_len); + if (cmp || strlen(elem) > target_len) + { + if (cmp < 0) + hi += 1; + hi = ~hi; + } + return hi; +} + +int SkStrSearch(const char*const* base, int count, const char target[], + size_t elemSize) +{ + return SkStrSearch(base, count, target, strlen(target), elemSize); +} + +int SkStrLCSearch(const char*const* base, int count, const char target[], + size_t len, size_t elemSize) +{ + SkASSERT(target); + + SkAutoAsciiToLC tolc(target, len); + + return SkStrSearch(base, count, tolc.lc(), len, elemSize); +} + +int SkStrLCSearch(const char*const* base, int count, const char target[], + size_t elemSize) +{ + return SkStrLCSearch(base, count, target, strlen(target), elemSize); +} + +////////////////////////////////////////////////////////////////////////////// + +SkAutoAsciiToLC::SkAutoAsciiToLC(const char str[], size_t len) +{ + // see if we need to compute the length + if ((long)len < 0) { + len = strlen(str); + } + fLength = len; + + // assign lc to our preallocated storage if len is small enough, or allocate + // it on the heap + char* lc; + if (len <= STORAGE) { + lc = fStorage; + } else { + lc = (char*)sk_malloc_throw(len + 1); + } + fLC = lc; + + // convert any asii to lower-case. we let non-ascii (utf8) chars pass + // through unchanged + for (int i = (int)(len - 1); i >= 0; --i) { + int c = str[i]; + if ((c & 0x80) == 0) { // is just ascii + c = tolower(c); + } + lc[i] = c; + } + lc[len] = 0; +} + +SkAutoAsciiToLC::~SkAutoAsciiToLC() +{ + if (fLC != fStorage) { + sk_free(fLC); + } +} diff --git a/gfx/skia/skia/src/base/SkTSearch.h b/gfx/skia/skia/src/base/SkTSearch.h new file mode 100644 index 0000000000..6ebd304029 --- /dev/null +++ b/gfx/skia/skia/src/base/SkTSearch.h @@ -0,0 +1,132 @@ + +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + + +#ifndef SkTSearch_DEFINED +#define SkTSearch_DEFINED + +#include "include/private/base/SkAssert.h" + +#include <cstddef> + +/** + * All of the SkTSearch variants want to return the index (0...N-1) of the + * found element, or the bit-not of where to insert the element. + * + * At a simple level, if the return value is negative, it was not found. + * + * For clients that want to insert the new element if it was not found, use + * the following logic: + * + * int index = SkTSearch(...); + * if (index >= 0) { + * // found at index + * } else { + * index = ~index; // now we are positive + * // insert at index + * } + */ + + +// The most general form of SkTSearch takes an array of T and a key of type K. A functor, less, is +// used to perform comparisons. It has two function operators: +// bool operator() (const T& t, const K& k) +// bool operator() (const K& t, const T& k) +template <typename T, typename K, typename LESS> +int SkTSearch(const T base[], int count, const K& key, size_t elemSize, const LESS& less) +{ + SkASSERT(count >= 0); + if (count <= 0) { + return ~0; + } + + SkASSERT(base != nullptr); // base may be nullptr if count is zero + + int lo = 0; + int hi = count - 1; + + while (lo < hi) { + int mid = lo + ((hi - lo) >> 1); + const T* elem = (const T*)((const char*)base + mid * elemSize); + + if (less(*elem, key)) + lo = mid + 1; + else + hi = mid; + } + + const T* elem = (const T*)((const char*)base + hi * elemSize); + if (less(*elem, key)) { + hi += 1; + hi = ~hi; + } else if (less(key, *elem)) { + hi = ~hi; + } + return hi; +} + +// Specialization for case when T==K and the caller wants to use a function rather than functor. +template <typename T, bool (LESS)(const T&, const T&)> +int SkTSearch(const T base[], int count, const T& target, size_t elemSize) { + return SkTSearch(base, count, target, elemSize, + [](const T& a, const T& b) { return LESS(a, b); }); +} + +// Specialization for T==K, compare using op <. +template <typename T> +int SkTSearch(const T base[], int count, const T& target, size_t elemSize) { + return SkTSearch(base, count, target, elemSize, [](const T& a, const T& b) { return a < b; }); +} + +// Specialization for case where domain is an array of T* and the key value is a T*, and you want +// to compare the T objects, not the pointers. +template <typename T, bool (LESS)(const T&, const T&)> +int SkTSearch(T* base[], int count, T* target, size_t elemSize) { + return SkTSearch(base, count, target, elemSize, + [](const T* t, const T* k) { return LESS(*t, *k); }); +} + +int SkStrSearch(const char*const* base, int count, const char target[], + size_t target_len, size_t elemSize); +int SkStrSearch(const char*const* base, int count, const char target[], + size_t elemSize); + +/** Like SkStrSearch, but treats target as if it were all lower-case. Assumes that + base points to a table of lower-case strings. +*/ +int SkStrLCSearch(const char*const* base, int count, const char target[], + size_t target_len, size_t elemSize); +int SkStrLCSearch(const char*const* base, int count, const char target[], + size_t elemSize); + +/** Helper class to convert a string to lower-case, but only modifying the ascii + characters. This makes the routine very fast and never changes the string + length, but it is not suitable for linguistic purposes. Normally this is + used for buiding and searching string tables. +*/ +class SkAutoAsciiToLC { +public: + SkAutoAsciiToLC(const char str[], size_t len = (size_t)-1); + ~SkAutoAsciiToLC(); + + const char* lc() const { return fLC; } + size_t length() const { return fLength; } + +private: + char* fLC; // points to either the heap or fStorage + size_t fLength; + enum { + STORAGE = 64 + }; + char fStorage[STORAGE+1]; +}; + +// Helper when calling qsort with a compare proc that has typed its arguments +#define SkCastForQSort(compare) reinterpret_cast<int (*)(const void*, const void*)>(compare) + +#endif diff --git a/gfx/skia/skia/src/base/SkTSort.h b/gfx/skia/skia/src/base/SkTSort.h new file mode 100644 index 0000000000..a1d35cc158 --- /dev/null +++ b/gfx/skia/skia/src/base/SkTSort.h @@ -0,0 +1,214 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkTSort_DEFINED +#define SkTSort_DEFINED + +#include "include/private/base/SkTo.h" +#include "src/base/SkMathPriv.h" + +#include <cstddef> +#include <utility> + +/////////////////////////////////////////////////////////////////////////////// + +/* Sifts a broken heap. The input array is a heap from root to bottom + * except that the root entry may be out of place. + * + * Sinks a hole from array[root] to leaf and then sifts the original array[root] element + * from the leaf level up. + * + * This version does extra work, in that it copies child to parent on the way down, + * then copies parent to child on the way back up. When copies are inexpensive, + * this is an optimization as this sift variant should only be used when + * the potentially out of place root entry value is expected to be small. + * + * @param root the one based index into array of the out-of-place root of the heap. + * @param bottom the one based index in the array of the last entry in the heap. + */ +template <typename T, typename C> +void SkTHeapSort_SiftUp(T array[], size_t root, size_t bottom, const C& lessThan) { + T x = array[root-1]; + size_t start = root; + size_t j = root << 1; + while (j <= bottom) { + if (j < bottom && lessThan(array[j-1], array[j])) { + ++j; + } + array[root-1] = array[j-1]; + root = j; + j = root << 1; + } + j = root >> 1; + while (j >= start) { + if (lessThan(array[j-1], x)) { + array[root-1] = array[j-1]; + root = j; + j = root >> 1; + } else { + break; + } + } + array[root-1] = x; +} + +/* Sifts a broken heap. The input array is a heap from root to bottom + * except that the root entry may be out of place. + * + * Sifts the array[root] element from the root down. + * + * @param root the one based index into array of the out-of-place root of the heap. + * @param bottom the one based index in the array of the last entry in the heap. + */ +template <typename T, typename C> +void SkTHeapSort_SiftDown(T array[], size_t root, size_t bottom, const C& lessThan) { + T x = array[root-1]; + size_t child = root << 1; + while (child <= bottom) { + if (child < bottom && lessThan(array[child-1], array[child])) { + ++child; + } + if (lessThan(x, array[child-1])) { + array[root-1] = array[child-1]; + root = child; + child = root << 1; + } else { + break; + } + } + array[root-1] = x; +} + +/** Sorts the array of size count using comparator lessThan using a Heap Sort algorithm. Be sure to + * specialize swap if T has an efficient swap operation. + * + * @param array the array to be sorted. + * @param count the number of elements in the array. + * @param lessThan a functor with bool operator()(T a, T b) which returns true if a comes before b. + */ +template <typename T, typename C> void SkTHeapSort(T array[], size_t count, const C& lessThan) { + for (size_t i = count >> 1; i > 0; --i) { + SkTHeapSort_SiftDown(array, i, count, lessThan); + } + + for (size_t i = count - 1; i > 0; --i) { + using std::swap; + swap(array[0], array[i]); + SkTHeapSort_SiftUp(array, 1, i, lessThan); + } +} + +/** Sorts the array of size count using comparator '<' using a Heap Sort algorithm. */ +template <typename T> void SkTHeapSort(T array[], size_t count) { + SkTHeapSort(array, count, [](const T& a, const T& b) { return a < b; }); +} + +/////////////////////////////////////////////////////////////////////////////// + +/** Sorts the array of size count using comparator lessThan using an Insertion Sort algorithm. */ +template <typename T, typename C> +void SkTInsertionSort(T* left, int count, const C& lessThan) { + T* right = left + count - 1; + for (T* next = left + 1; next <= right; ++next) { + if (!lessThan(*next, *(next - 1))) { + continue; + } + T insert = std::move(*next); + T* hole = next; + do { + *hole = std::move(*(hole - 1)); + --hole; + } while (left < hole && lessThan(insert, *(hole - 1))); + *hole = std::move(insert); + } +} + +/////////////////////////////////////////////////////////////////////////////// + +template <typename T, typename C> +T* SkTQSort_Partition(T* left, int count, T* pivot, const C& lessThan) { + T* right = left + count - 1; + using std::swap; + T pivotValue = *pivot; + swap(*pivot, *right); + T* newPivot = left; + while (left < right) { + if (lessThan(*left, pivotValue)) { + swap(*left, *newPivot); + newPivot += 1; + } + left += 1; + } + swap(*newPivot, *right); + return newPivot; +} + +/* Introsort is a modified Quicksort. + * When the region to be sorted is a small constant size, it uses Insertion Sort. + * When depth becomes zero, it switches over to Heap Sort. + * This implementation recurses on the left region after pivoting and loops on the right, + * we already limit the stack depth by switching to heap sort, + * and cache locality on the data appears more important than saving a few stack frames. + * + * @param depth at this recursion depth, switch to Heap Sort. + * @param left points to the beginning of the region to be sorted + * @param count number of items to be sorted + * @param lessThan a functor/lambda which returns true if a comes before b. + */ +template <typename T, typename C> +void SkTIntroSort(int depth, T* left, int count, const C& lessThan) { + for (;;) { + if (count <= 32) { + SkTInsertionSort(left, count, lessThan); + return; + } + + if (depth == 0) { + SkTHeapSort<T>(left, count, lessThan); + return; + } + --depth; + + T* middle = left + ((count - 1) >> 1); + T* pivot = SkTQSort_Partition(left, count, middle, lessThan); + int pivotCount = pivot - left; + + SkTIntroSort(depth, left, pivotCount, lessThan); + left += pivotCount + 1; + count -= pivotCount + 1; + } +} + +/** Sorts the region from left to right using comparator lessThan using Introsort. + * Be sure to specialize `swap` if T has an efficient swap operation. + * + * @param begin points to the beginning of the region to be sorted + * @param end points past the end of the region to be sorted + * @param lessThan a functor/lambda which returns true if a comes before b. + */ +template <typename T, typename C> +void SkTQSort(T* begin, T* end, const C& lessThan) { + int n = SkToInt(end - begin); + if (n <= 1) { + return; + } + // Limit Introsort recursion depth to no more than 2 * ceil(log2(n-1)). + int depth = 2 * SkNextLog2(n - 1); + SkTIntroSort(depth, begin, n, lessThan); +} + +/** Sorts the region from left to right using comparator 'a < b' using Introsort. */ +template <typename T> void SkTQSort(T* begin, T* end) { + SkTQSort(begin, end, [](const T& a, const T& b) { return a < b; }); +} + +/** Sorts the region from left to right using comparator '*a < *b' using Introsort. */ +template <typename T> void SkTQSort(T** begin, T** end) { + SkTQSort(begin, end, [](const T* a, const T* b) { return *a < *b; }); +} + +#endif diff --git a/gfx/skia/skia/src/base/SkThreadID.cpp b/gfx/skia/skia/src/base/SkThreadID.cpp new file mode 100644 index 0000000000..e5b7a06c7c --- /dev/null +++ b/gfx/skia/skia/src/base/SkThreadID.cpp @@ -0,0 +1,16 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "include/private/base/SkThreadID.h" + +#ifdef SK_BUILD_FOR_WIN + #include "src/base/SkLeanWindows.h" + SkThreadID SkGetThreadID() { return GetCurrentThreadId(); } +#else + #include <pthread.h> + SkThreadID SkGetThreadID() { return (int64_t)pthread_self(); } +#endif diff --git a/gfx/skia/skia/src/base/SkUTF.cpp b/gfx/skia/skia/src/base/SkUTF.cpp new file mode 100644 index 0000000000..20325fb2b6 --- /dev/null +++ b/gfx/skia/skia/src/base/SkUTF.cpp @@ -0,0 +1,316 @@ +// Copyright 2018 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. + +#include "src/base/SkUTF.h" + +#include "include/private/base/SkTFitsIn.h" + +static constexpr inline int32_t left_shift(int32_t value, int32_t shift) { + return (int32_t) ((uint32_t) value << shift); +} + +template <typename T> static constexpr bool is_align2(T x) { return 0 == (x & 1); } + +template <typename T> static constexpr bool is_align4(T x) { return 0 == (x & 3); } + +static constexpr inline bool utf16_is_high_surrogate(uint16_t c) { return (c & 0xFC00) == 0xD800; } + +static constexpr inline bool utf16_is_low_surrogate(uint16_t c) { return (c & 0xFC00) == 0xDC00; } + +/** @returns -1 iff invalid UTF8 byte, + 0 iff UTF8 continuation byte, + 1 iff ASCII byte, + 2 iff leading byte of 2-byte sequence, + 3 iff leading byte of 3-byte sequence, and + 4 iff leading byte of 4-byte sequence. + I.e.: if return value > 0, then gives length of sequence. +*/ +static int utf8_byte_type(uint8_t c) { + if (c < 0x80) { + return 1; + } else if (c < 0xC0) { + return 0; + } else if (c >= 0xF5 || (c & 0xFE) == 0xC0) { // "octet values c0, c1, f5 to ff never appear" + return -1; + } else { + int value = (((0xe5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1; + // assert(value >= 2 && value <=4); + return value; + } +} +static bool utf8_type_is_valid_leading_byte(int type) { return type > 0; } + +static bool utf8_byte_is_continuation(uint8_t c) { return utf8_byte_type(c) == 0; } + +//////////////////////////////////////////////////////////////////////////////// + +int SkUTF::CountUTF8(const char* utf8, size_t byteLength) { + if (!utf8 && byteLength) { + return -1; + } + int count = 0; + const char* stop = utf8 + byteLength; + while (utf8 < stop) { + int type = utf8_byte_type(*(const uint8_t*)utf8); + if (!utf8_type_is_valid_leading_byte(type) || utf8 + type > stop) { + return -1; // Sequence extends beyond end. + } + while(type-- > 1) { + ++utf8; + if (!utf8_byte_is_continuation(*(const uint8_t*)utf8)) { + return -1; + } + } + ++utf8; + ++count; + } + return count; +} + +int SkUTF::CountUTF16(const uint16_t* utf16, size_t byteLength) { + if (!utf16 || !is_align2(intptr_t(utf16)) || !is_align2(byteLength)) { + return -1; + } + const uint16_t* src = (const uint16_t*)utf16; + const uint16_t* stop = src + (byteLength >> 1); + int count = 0; + while (src < stop) { + unsigned c = *src++; + if (utf16_is_low_surrogate(c)) { + return -1; + } + if (utf16_is_high_surrogate(c)) { + if (src >= stop) { + return -1; + } + c = *src++; + if (!utf16_is_low_surrogate(c)) { + return -1; + } + } + count += 1; + } + return count; +} + +int SkUTF::CountUTF32(const int32_t* utf32, size_t byteLength) { + if (!is_align4(intptr_t(utf32)) || !is_align4(byteLength) || !SkTFitsIn<int>(byteLength >> 2)) { + return -1; + } + const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits + const uint32_t* ptr = (const uint32_t*)utf32; + const uint32_t* stop = ptr + (byteLength >> 2); + while (ptr < stop) { + if (*ptr & kInvalidUnicharMask) { + return -1; + } + ptr += 1; + } + return (int)(byteLength >> 2); +} + +template <typename T> +static SkUnichar next_fail(const T** ptr, const T* end) { + *ptr = end; + return -1; +} + +SkUnichar SkUTF::NextUTF8(const char** ptr, const char* end) { + if (!ptr || !end ) { + return -1; + } + const uint8_t* p = (const uint8_t*)*ptr; + if (!p || p >= (const uint8_t*)end) { + return next_fail(ptr, end); + } + int c = *p; + int hic = c << 24; + + if (!utf8_type_is_valid_leading_byte(utf8_byte_type(c))) { + return next_fail(ptr, end); + } + if (hic < 0) { + uint32_t mask = (uint32_t)~0x3F; + hic = left_shift(hic, 1); + do { + ++p; + if (p >= (const uint8_t*)end) { + return next_fail(ptr, end); + } + // check before reading off end of array. + uint8_t nextByte = *p; + if (!utf8_byte_is_continuation(nextByte)) { + return next_fail(ptr, end); + } + c = (c << 6) | (nextByte & 0x3F); + mask <<= 5; + } while ((hic = left_shift(hic, 1)) < 0); + c &= ~mask; + } + *ptr = (char*)p + 1; + return c; +} + +SkUnichar SkUTF::NextUTF16(const uint16_t** ptr, const uint16_t* end) { + if (!ptr || !end ) { + return -1; + } + const uint16_t* src = *ptr; + if (!src || src + 1 > end || !is_align2(intptr_t(src))) { + return next_fail(ptr, end); + } + uint16_t c = *src++; + SkUnichar result = c; + if (utf16_is_low_surrogate(c)) { + return next_fail(ptr, end); // srcPtr should never point at low surrogate. + } + if (utf16_is_high_surrogate(c)) { + if (src + 1 > end) { + return next_fail(ptr, end); // Truncated string. + } + uint16_t low = *src++; + if (!utf16_is_low_surrogate(low)) { + return next_fail(ptr, end); + } + /* + [paraphrased from wikipedia] + Take the high surrogate and subtract 0xD800, then multiply by 0x400. + Take the low surrogate and subtract 0xDC00. Add these two results + together, and finally add 0x10000 to get the final decoded codepoint. + + unicode = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000 + unicode = (high * 0x400) - (0xD800 * 0x400) + low - 0xDC00 + 0x10000 + unicode = (high << 10) - (0xD800 << 10) + low - 0xDC00 + 0x10000 + unicode = (high << 10) + low - ((0xD800 << 10) + 0xDC00 - 0x10000) + */ + result = (result << 10) + (SkUnichar)low - ((0xD800 << 10) + 0xDC00 - 0x10000); + } + *ptr = src; + return result; +} + +SkUnichar SkUTF::NextUTF32(const int32_t** ptr, const int32_t* end) { + if (!ptr || !end ) { + return -1; + } + const int32_t* s = *ptr; + if (!s || s + 1 > end || !is_align4(intptr_t(s))) { + return next_fail(ptr, end); + } + int32_t value = *s; + const uint32_t kInvalidUnicharMask = 0xFF000000; // unichar fits in 24 bits + if (value & kInvalidUnicharMask) { + return next_fail(ptr, end); + } + *ptr = s + 1; + return value; +} + +size_t SkUTF::ToUTF8(SkUnichar uni, char utf8[SkUTF::kMaxBytesInUTF8Sequence]) { + if ((uint32_t)uni > 0x10FFFF) { + return 0; + } + if (uni <= 127) { + if (utf8) { + *utf8 = (char)uni; + } + return 1; + } + char tmp[4]; + char* p = tmp; + size_t count = 1; + while (uni > 0x7F >> count) { + *p++ = (char)(0x80 | (uni & 0x3F)); + uni >>= 6; + count += 1; + } + if (utf8) { + p = tmp; + utf8 += count; + while (p < tmp + count - 1) { + *--utf8 = *p++; + } + *--utf8 = (char)(~(0xFF >> count) | uni); + } + return count; +} + +size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) { + if ((uint32_t)uni > 0x10FFFF) { + return 0; + } + int extra = (uni > 0xFFFF); + if (utf16) { + if (extra) { + utf16[0] = (uint16_t)((0xD800 - 64) + (uni >> 10)); + utf16[1] = (uint16_t)(0xDC00 | (uni & 0x3FF)); + } else { + utf16[0] = (uint16_t)uni; + } + } + return 1 + extra; +} + +int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) { + if (!dst) { + dstCapacity = 0; + } + + int dstLength = 0; + uint16_t* endDst = dst + dstCapacity; + const char* endSrc = src + srcByteLength; + while (src < endSrc) { + SkUnichar uni = NextUTF8(&src, endSrc); + if (uni < 0) { + return -1; + } + + uint16_t utf16[2]; + size_t count = ToUTF16(uni, utf16); + if (count == 0) { + return -1; + } + dstLength += count; + + if (dst) { + uint16_t* elems = utf16; + while (dst < endDst && count > 0) { + *dst++ = *elems++; + count -= 1; + } + } + } + return dstLength; +} + +int SkUTF::UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength) { + if (!dst) { + dstCapacity = 0; + } + + int dstLength = 0; + const char* endDst = dst + dstCapacity; + const uint16_t* endSrc = src + srcLength; + while (src < endSrc) { + SkUnichar uni = NextUTF16(&src, endSrc); + if (uni < 0) { + return -1; + } + + char utf8[SkUTF::kMaxBytesInUTF8Sequence]; + size_t count = ToUTF8(uni, utf8); + if (count == 0) { + return -1; + } + dstLength += count; + + if (dst) { + const char* elems = utf8; + while (dst < endDst && count > 0) { + *dst++ = *elems++; + count -= 1; + } + } + } + return dstLength; +} diff --git a/gfx/skia/skia/src/base/SkUTF.h b/gfx/skia/skia/src/base/SkUTF.h new file mode 100644 index 0000000000..e50804da98 --- /dev/null +++ b/gfx/skia/skia/src/base/SkUTF.h @@ -0,0 +1,95 @@ +// Copyright 2018 Google LLC. +// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. +#ifndef SkUTF_DEFINED +#define SkUTF_DEFINED + +#include "include/private/base/SkAPI.h" + +#include <cstddef> +#include <cstdint> + +typedef int32_t SkUnichar; + +namespace SkUTF { + +/** Given a sequence of UTF-8 bytes, return the number of unicode codepoints. + If the sequence is invalid UTF-8, return -1. +*/ +SK_SPI int CountUTF8(const char* utf8, size_t byteLength); + +/** Given a sequence of aligned UTF-16 characters in machine-endian form, + return the number of unicode codepoints. If the sequence is invalid + UTF-16, return -1. +*/ +SK_SPI int CountUTF16(const uint16_t* utf16, size_t byteLength); + +/** Given a sequence of aligned UTF-32 characters in machine-endian form, + return the number of unicode codepoints. If the sequence is invalid + UTF-32, return -1. +*/ +SK_SPI int CountUTF32(const int32_t* utf32, size_t byteLength); + +/** Given a sequence of UTF-8 bytes, return the first unicode codepoint. + The pointer will be incremented to point at the next codepoint's start. If + invalid UTF-8 is encountered, set *ptr to end and return -1. +*/ +SK_SPI SkUnichar NextUTF8(const char** ptr, const char* end); + +/** Given a sequence of aligned UTF-16 characters in machine-endian form, + return the first unicode codepoint. The pointer will be incremented to + point at the next codepoint's start. If invalid UTF-16 is encountered, + set *ptr to end and return -1. +*/ +SK_SPI SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end); + +/** Given a sequence of aligned UTF-32 characters in machine-endian form, + return the first unicode codepoint. The pointer will be incremented to + point at the next codepoint's start. If invalid UTF-32 is encountered, + set *ptr to end and return -1. +*/ +SK_SPI SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end); + +constexpr unsigned kMaxBytesInUTF8Sequence = 4; + +/** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the + result in that array. Return the number of bytes in the result. If `utf8` + is null, simply return the number of bytes that would be used. For invalid + unicode codepoints, return 0. +*/ +SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr); + +/** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place + the result in that array. Return the number of UTF-16 code units in the + result (1 or 2). If `utf16` is null, simply return the number of code + units that would be used. For invalid unicode codepoints, return 0. +*/ +SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr); + +/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence. + * If dst is not null, it is filled with the corresponding values up to its capacity. + * If there is an error, -1 is returned and the dst[] buffer is undefined. + */ +SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength); + +/** Returns the number of resulting UTF8 values needed to convert the src utf16 sequence. + * If dst is not null, it is filled with the corresponding values up to its capacity. + * If there is an error, -1 is returned and the dst[] buffer is undefined. + */ +SK_SPI int UTF16ToUTF8(char dst[], int dstCapacity, const uint16_t src[], size_t srcLength); + +/** + * Given a UTF-16 code point, returns true iff it is a leading surrogate. + * https://unicode.org/faq/utf_bom.html#utf16-2 + */ +static inline bool IsLeadingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xD800; } + +/** + * Given a UTF-16 code point, returns true iff it is a trailing surrogate. + * https://unicode.org/faq/utf_bom.html#utf16-2 + */ +static inline bool IsTrailingSurrogateUTF16(uint16_t c) { return ((c) & 0xFC00) == 0xDC00; } + + +} // namespace SkUTF + +#endif // SkUTF_DEFINED diff --git a/gfx/skia/skia/src/base/SkUtils.cpp b/gfx/skia/skia/src/base/SkUtils.cpp new file mode 100644 index 0000000000..b9852e9389 --- /dev/null +++ b/gfx/skia/skia/src/base/SkUtils.cpp @@ -0,0 +1,13 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/base/SkUtils.h" + +const char SkHexadecimalDigits::gUpper[16] = + { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; +const char SkHexadecimalDigits::gLower[16] = + { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; diff --git a/gfx/skia/skia/src/base/SkUtils.h b/gfx/skia/skia/src/base/SkUtils.h new file mode 100644 index 0000000000..ae2331dfca --- /dev/null +++ b/gfx/skia/skia/src/base/SkUtils.h @@ -0,0 +1,55 @@ +/* + * Copyright 2006 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkUtils_DEFINED +#define SkUtils_DEFINED + +#include "include/private/base/SkAttributes.h" + +#include <cstring> +#include <type_traits> // is_trivially_copyable + +namespace SkHexadecimalDigits { + extern const char gUpper[16]; // 0-9A-F + extern const char gLower[16]; // 0-9a-f +} // namespace SkHexadecimalDigits + +/////////////////////////////////////////////////////////////////////////////// + +// If T is an 8-byte GCC or Clang vector extension type, it would naturally +// pass or return in the MMX mm0 register on 32-bit x86 builds. This has the +// fun side effect of clobbering any state in the x87 st0 register. (There is +// no ABI governing who should preserve mm?/st? registers, so no one does!) +// +// We force-inline sk_unaligned_load() and sk_unaligned_store() to avoid that, +// making them safe to use for all types on all platforms, thus solving the +// problem once and for all! + +template <typename T, typename P> +static SK_ALWAYS_INLINE T sk_unaligned_load(const P* ptr) { + static_assert(std::is_trivially_copyable<T>::value); + static_assert(std::is_trivially_copyable<P>::value); + T val; + memcpy(&val, ptr, sizeof(val)); + return val; +} + +template <typename T, typename P> +static SK_ALWAYS_INLINE void sk_unaligned_store(P* ptr, T val) { + static_assert(std::is_trivially_copyable<T>::value); + static_assert(std::is_trivially_copyable<P>::value); + memcpy(ptr, &val, sizeof(val)); +} + +// Copy the bytes from src into an instance of type Dst and return it. +template <typename Dst, typename Src> +static SK_ALWAYS_INLINE Dst sk_bit_cast(const Src& src) { + static_assert(sizeof(Dst) == sizeof(Src)); + return sk_unaligned_load<Dst>(&src); +} + +#endif diff --git a/gfx/skia/skia/src/base/SkVx.h b/gfx/skia/skia/src/base/SkVx.h new file mode 100644 index 0000000000..a1731ad0c4 --- /dev/null +++ b/gfx/skia/skia/src/base/SkVx.h @@ -0,0 +1,1183 @@ +/* + * Copyright 2019 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SKVX_DEFINED +#define SKVX_DEFINED + +// skvx::Vec<N,T> are SIMD vectors of N T's, a v1.5 successor to SkNx<N,T>. +// +// This time we're leaning a bit less on platform-specific intrinsics and a bit +// more on Clang/GCC vector extensions, but still keeping the option open to +// drop in platform-specific intrinsics, actually more easily than before. +// +// We've also fixed a few of the caveats that used to make SkNx awkward to work +// with across translation units. skvx::Vec<N,T> always has N*sizeof(T) size +// and alignment and is safe to use across translation units freely. +// (Ideally we'd only align to T, but that tanks ARMv7 NEON codegen.) + +// Please try to keep this file independent of Skia headers. +#include <algorithm> // std::min, std::max +#include <cassert> // assert() +#include <cmath> // ceilf, floorf, truncf, roundf, sqrtf, etc. +#include <cstdint> // intXX_t +#include <cstring> // memcpy() +#include <initializer_list> // std::initializer_list +#include <type_traits> +#include <utility> // std::index_sequence + +// Users may disable SIMD with SKNX_NO_SIMD, which may be set via compiler flags. +// The gn build has no option which sets SKNX_NO_SIMD. +// Use SKVX_USE_SIMD internally to avoid confusing double negation. +// Do not use 'defined' in a macro expansion. +#if !defined(SKNX_NO_SIMD) + #define SKVX_USE_SIMD 1 +#else + #define SKVX_USE_SIMD 0 +#endif + +#if SKVX_USE_SIMD + #if defined(__SSE__) || defined(__AVX__) || defined(__AVX2__) + #include <immintrin.h> + #elif defined(__ARM_NEON) + #include <arm_neon.h> + #elif defined(__wasm_simd128__) + #include <wasm_simd128.h> + #endif +#endif + +// To avoid ODR violations, all methods must be force-inlined... +#if defined(_MSC_VER) + #define SKVX_ALWAYS_INLINE __forceinline +#else + #define SKVX_ALWAYS_INLINE __attribute__((always_inline)) +#endif + +// ... and all standalone functions must be static. Please use these helpers: +#define SI static inline +#define SIT template < typename T> SI +#define SIN template <int N > SI +#define SINT template <int N, typename T> SI +#define SINTU template <int N, typename T, typename U, \ + typename=std::enable_if_t<std::is_convertible<U,T>::value>> SI + +namespace skvx { + +template <int N, typename T> +struct alignas(N*sizeof(T)) Vec; + +template <int... Ix, int N, typename T> +SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&); + +template <typename D, typename S> +SI D bit_pun(const S& s) { + static_assert(sizeof(D) == sizeof(S)); + D d; + memcpy(&d, &s, sizeof(D)); + return d; +} + +// All Vec have the same simple memory layout, the same as `T vec[N]`. +template <int N, typename T> +struct alignas(N*sizeof(T)) VecStorage { + SKVX_ALWAYS_INLINE VecStorage() = default; + SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {} + + Vec<N/2,T> lo, hi; +}; + +template <typename T> +struct VecStorage<4,T> { + SKVX_ALWAYS_INLINE VecStorage() = default; + SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {} + SKVX_ALWAYS_INLINE VecStorage(T x, T y, T z, T w) : lo(x,y), hi(z, w) {} + SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {} + SKVX_ALWAYS_INLINE VecStorage(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {} + SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {} + + SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; } + SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; } + SKVX_ALWAYS_INLINE T& x() { return lo.lo.val; } + SKVX_ALWAYS_INLINE T& y() { return lo.hi.val; } + SKVX_ALWAYS_INLINE T& z() { return hi.lo.val; } + SKVX_ALWAYS_INLINE T& w() { return hi.hi.val; } + + SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; } + SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; } + SKVX_ALWAYS_INLINE T x() const { return lo.lo.val; } + SKVX_ALWAYS_INLINE T y() const { return lo.hi.val; } + SKVX_ALWAYS_INLINE T z() const { return hi.lo.val; } + SKVX_ALWAYS_INLINE T w() const { return hi.hi.val; } + + // Exchange-based swizzles. These should take 1 cycle on NEON and 3 (pipelined) cycles on SSE. + SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(bit_pun<Vec<4,T>>(*this)); } + SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(bit_pun<Vec<4,T>>(*this)); } + + Vec<2,T> lo, hi; +}; + +template <typename T> +struct VecStorage<2,T> { + SKVX_ALWAYS_INLINE VecStorage() = default; + SKVX_ALWAYS_INLINE VecStorage(T s) : lo(s), hi(s) {} + SKVX_ALWAYS_INLINE VecStorage(T x, T y) : lo(x), hi(y) {} + + SKVX_ALWAYS_INLINE T& x() { return lo.val; } + SKVX_ALWAYS_INLINE T& y() { return hi.val; } + + SKVX_ALWAYS_INLINE T x() const { return lo.val; } + SKVX_ALWAYS_INLINE T y() const { return hi.val; } + + // This exchange-based swizzle should take 1 cycle on NEON and 3 (pipelined) cycles on SSE. + SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(bit_pun<Vec<2,T>>(*this)); } + + SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const { + return Vec<4,T>(bit_pun<Vec<2,T>>(*this), bit_pun<Vec<2,T>>(*this)); + } + + Vec<1,T> lo, hi; +}; + +// Translate from a value type T to its corresponding Mask, the result of a comparison. +template <typename T> struct Mask { using type = T; }; +template <> struct Mask<float > { using type = int32_t; }; +template <> struct Mask<double> { using type = int64_t; }; +template <typename T> using M = typename Mask<T>::type; + +template <int N, typename T> +struct NoConversion { T vals[N]; }; + +template <int N, typename T> +struct ConvertNative { + typedef NoConversion<N, T> type; +}; + +#if SKVX_USE_SIMD && defined(__SSE__) +template<> +struct ConvertNative<4, float> { + typedef __m128 type; +}; + +template<> +struct ConvertNative<4, int32_t> { + typedef __m128i type; +}; + +template <> +struct ConvertNative<4, uint32_t> { + typedef __m128i type; +}; + +template<> +struct ConvertNative<8, int16_t> { + typedef __m128i type; +}; + +template <> +struct ConvertNative<8, uint16_t> { + typedef __m128i type; +}; + +template <> +struct ConvertNative<16, uint8_t> { + typedef __m128i type; +}; +#endif + +#if SKVX_USE_SIMD && defined(__AVX__) +template<> +struct ConvertNative<8, float> { + typedef __m256 type; +}; + +template<> +struct ConvertNative<8, int32_t> { + typedef __m256i type; +}; + +template <> +struct ConvertNative<8, uint32_t> { + typedef __m256i type; +}; + +template<> +struct ConvertNative<16, int16_t> { + typedef __m256i type; +}; + +template <> +struct ConvertNative<16, uint16_t> { + typedef __m256i type; +}; +#endif + +#if SKVX_USE_SIMD && defined(__ARM_NEON) +template<> +struct ConvertNative<4, float> { + typedef float32x4_t type; +}; + +template<> +struct ConvertNative<4, int32_t> { + typedef int32x4_t type; +}; + +template <> +struct ConvertNative<4, uint32_t> { + typedef uint32x4_t type; +}; + +template<> +struct ConvertNative<4, int16_t> { + typedef int16x4_t type; +}; + +template <> +struct ConvertNative<4, uint16_t> { + typedef uint16x4_t type; +}; + +template<> +struct ConvertNative<8, int16_t> { + typedef int16x8_t type; +}; + +template <> +struct ConvertNative<8, uint16_t> { + typedef uint16x8_t type; +}; + +template <> +struct ConvertNative<8, uint8_t> { + typedef uint8x8_t type; +}; +#endif + +template <int N, typename T> +struct alignas(N*sizeof(T)) Vec : public VecStorage<N,T> { + typedef T elem_type; + + static_assert((N & (N-1)) == 0, "N must be a power of 2."); + static_assert(sizeof(T) >= alignof(T), "What kind of unusual T is this?"); + + // Methods belong here in the class declaration of Vec only if: + // - they must be here, like constructors or operator[]; + // - they'll definitely never want a specialized implementation. + // Other operations on Vec should be defined outside the type. + + SKVX_ALWAYS_INLINE Vec() = default; + SKVX_ALWAYS_INLINE Vec(typename ConvertNative<N, T>::type native) : Vec(bit_pun<Vec>(native)) {} + + using VecStorage<N,T>::VecStorage; + + // NOTE: Vec{x} produces x000..., whereas Vec(x) produces xxxx.... since this constructor fills + // unspecified lanes with 0s, whereas the single T constructor fills all lanes with the value. + SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) { + T vals[N] = {0}; + memcpy(vals, xs.begin(), std::min(xs.size(), (size_t)N)*sizeof(T)); + + this->lo = Vec<N/2,T>::Load(vals + 0); + this->hi = Vec<N/2,T>::Load(vals + N/2); + } + + operator typename ConvertNative<N, T>::type() const { return bit_pun<typename ConvertNative<N, T>::type>(*this); } + + SKVX_ALWAYS_INLINE T operator[](int i) const { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; } + SKVX_ALWAYS_INLINE T& operator[](int i) { return i<N/2 ? this->lo[i] : this->hi[i-N/2]; } + + SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) { + Vec v; + memcpy(&v, ptr, sizeof(Vec)); + return v; + } + SKVX_ALWAYS_INLINE void store(void* ptr) const { + memcpy(ptr, this, sizeof(Vec)); + } +}; + +template <typename T> +struct Vec<1,T> { + typedef T elem_type; + + T val; + + SKVX_ALWAYS_INLINE Vec() = default; + + Vec(T s) : val(s) {} + + SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {} + + SKVX_ALWAYS_INLINE T operator[](int) const { return val; } + SKVX_ALWAYS_INLINE T& operator[](int) { return val; } + + SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) { + Vec v; + memcpy(&v, ptr, sizeof(Vec)); + return v; + } + SKVX_ALWAYS_INLINE void store(void* ptr) const { + memcpy(ptr, this, sizeof(Vec)); + } +}; + +// Join two Vec<N,T> into one Vec<2N,T>. +SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) { + Vec<2*N,T> v; + v.lo = lo; + v.hi = hi; + return v; +} + +// We have three strategies for implementing Vec operations: +// 1) lean on Clang/GCC vector extensions when available; +// 2) use map() to apply a scalar function lane-wise; +// 3) recurse on lo/hi to scalar portable implementations. +// We can slot in platform-specific implementations as overloads for particular Vec<N,T>, +// or often integrate them directly into the recursion of style 3), allowing fine control. + +#if SKVX_USE_SIMD && (defined(__clang__) || defined(__GNUC__)) + + // VExt<N,T> types have the same size as Vec<N,T> and support most operations directly. + #if defined(__clang__) + template <int N, typename T> + using VExt = T __attribute__((ext_vector_type(N))); + + #elif defined(__GNUC__) + template <int N, typename T> + struct VExtHelper { + typedef T __attribute__((vector_size(N*sizeof(T)))) type; + }; + + template <int N, typename T> + using VExt = typename VExtHelper<N,T>::type; + + // For some reason some (new!) versions of GCC cannot seem to deduce N in the generic + // to_vec<N,T>() below for N=4 and T=float. This workaround seems to help... + SI Vec<4,float> to_vec(VExt<4,float> v) { return bit_pun<Vec<4,float>>(v); } + #endif + + SINT VExt<N,T> to_vext(const Vec<N,T>& v) { return bit_pun<VExt<N,T>>(v); } + SINT Vec <N,T> to_vec(const VExt<N,T>& v) { return bit_pun<Vec <N,T>>(v); } + + SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) + to_vext(y)); + } + SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) - to_vext(y)); + } + SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) * to_vext(y)); + } + SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) / to_vext(y)); + } + + SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) ^ to_vext(y)); + } + SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) & to_vext(y)); + } + SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) | to_vext(y)); + } + SINT Vec<N,T> operator&&(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) & to_vext(y)); + } + SINT Vec<N,T> operator||(const Vec<N,T>& x, const Vec<N,T>& y) { + return to_vec<N,T>(to_vext(x) | to_vext(y)); + } + + SINT Vec<N,T> operator!(const Vec<N,T>& x) { return to_vec<N,T>(!to_vext(x)); } + SINT Vec<N,T> operator-(const Vec<N,T>& x) { return to_vec<N,T>(-to_vext(x)); } + SINT Vec<N,T> operator~(const Vec<N,T>& x) { return to_vec<N,T>(~to_vext(x)); } + + SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) << k); } + SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) >> k); } + + SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) == to_vext(y)); + } + SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) != to_vext(y)); + } + SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) <= to_vext(y)); + } + SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) >= to_vext(y)); + } + SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) < to_vext(y)); + } + SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) { + return bit_pun<Vec<N,M<T>>>(to_vext(x) > to_vext(y)); + } + +#else + + // Either SKNX_NO_SIMD is defined, or Clang/GCC vector extensions are not available. + // We'll implement things portably with N==1 scalar implementations and recursion onto them. + + // N == 1 scalar implementations. + SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; } + SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; } + SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; } + SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; } + + SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; } + SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; } + SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; } + SIT Vec<1,T> operator&&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; } + SIT Vec<1,T> operator||(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; } + + SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; } + SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; } + SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; } + + SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; } + SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; } + + SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val == y.val ? ~0 : 0; + } + SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val != y.val ? ~0 : 0; + } + SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val <= y.val ? ~0 : 0; + } + SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val >= y.val ? ~0 : 0; + } + SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val < y.val ? ~0 : 0; + } + SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) { + return x.val > y.val ? ~0 : 0; + } + + // Recurse on lo/hi down to N==1 scalar implementations. + SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo + y.lo, x.hi + y.hi); + } + SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo - y.lo, x.hi - y.hi); + } + SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo * y.lo, x.hi * y.hi); + } + SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo / y.lo, x.hi / y.hi); + } + + SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo ^ y.lo, x.hi ^ y.hi); + } + SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo & y.lo, x.hi & y.hi); + } + SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo | y.lo, x.hi | y.hi); + } + SINT Vec<N,T> operator&&(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo & y.lo, x.hi & y.hi); + } + SINT Vec<N,T> operator||(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo | y.lo, x.hi | y.hi); + } + + SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); } + SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); } + SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); } + + SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); } + SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); } + + SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo == y.lo, x.hi == y.hi); + } + SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo != y.lo, x.hi != y.hi); + } + SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo <= y.lo, x.hi <= y.hi); + } + SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo >= y.lo, x.hi >= y.hi); + } + SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo < y.lo, x.hi < y.hi); + } + SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) { + return join(x.lo > y.lo, x.hi > y.hi); + } +#endif + +// Scalar/vector operations splat the scalar to a vector. +SINTU Vec<N,T> operator+ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) + y; } +SINTU Vec<N,T> operator- (U x, const Vec<N,T>& y) { return Vec<N,T>(x) - y; } +SINTU Vec<N,T> operator* (U x, const Vec<N,T>& y) { return Vec<N,T>(x) * y; } +SINTU Vec<N,T> operator/ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) / y; } +SINTU Vec<N,T> operator^ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) ^ y; } +SINTU Vec<N,T> operator& (U x, const Vec<N,T>& y) { return Vec<N,T>(x) & y; } +SINTU Vec<N,T> operator| (U x, const Vec<N,T>& y) { return Vec<N,T>(x) | y; } +SINTU Vec<N,T> operator&&(U x, const Vec<N,T>& y) { return Vec<N,T>(x) && y; } +SINTU Vec<N,T> operator||(U x, const Vec<N,T>& y) { return Vec<N,T>(x) || y; } +SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { return Vec<N,T>(x) == y; } +SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) != y; } +SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) <= y; } +SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) >= y; } +SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { return Vec<N,T>(x) < y; } +SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { return Vec<N,T>(x) > y; } + +SINTU Vec<N,T> operator+ (const Vec<N,T>& x, U y) { return x + Vec<N,T>(y); } +SINTU Vec<N,T> operator- (const Vec<N,T>& x, U y) { return x - Vec<N,T>(y); } +SINTU Vec<N,T> operator* (const Vec<N,T>& x, U y) { return x * Vec<N,T>(y); } +SINTU Vec<N,T> operator/ (const Vec<N,T>& x, U y) { return x / Vec<N,T>(y); } +SINTU Vec<N,T> operator^ (const Vec<N,T>& x, U y) { return x ^ Vec<N,T>(y); } +SINTU Vec<N,T> operator& (const Vec<N,T>& x, U y) { return x & Vec<N,T>(y); } +SINTU Vec<N,T> operator| (const Vec<N,T>& x, U y) { return x | Vec<N,T>(y); } +SINTU Vec<N,T> operator&&(const Vec<N,T>& x, U y) { return x && Vec<N,T>(y); } +SINTU Vec<N,T> operator||(const Vec<N,T>& x, U y) { return x || Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { return x == Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { return x != Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { return x <= Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { return x >= Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { return x < Vec<N,T>(y); } +SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { return x > Vec<N,T>(y); } + +SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x + y); } +SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x - y); } +SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x * y); } +SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x / y); } +SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x ^ y); } +SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x & y); } +SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x | y); } + +SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { return (x = x + Vec<N,T>(y)); } +SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { return (x = x - Vec<N,T>(y)); } +SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { return (x = x * Vec<N,T>(y)); } +SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { return (x = x / Vec<N,T>(y)); } +SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { return (x = x ^ Vec<N,T>(y)); } +SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { return (x = x & Vec<N,T>(y)); } +SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { return (x = x | Vec<N,T>(y)); } + +SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { return (x = x << bits); } +SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { return (x = x >> bits); } + +// Some operations we want are not expressible with Clang/GCC vector extensions. + +// Clang can reason about naive_if_then_else() and optimize through it better +// than if_then_else(), so it's sometimes useful to call it directly when we +// think an entire expression should optimize away, e.g. min()/max(). +SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) { + return bit_pun<Vec<N,T>>(( cond & bit_pun<Vec<N, M<T>>>(t)) | + (~cond & bit_pun<Vec<N, M<T>>>(e)) ); +} + +SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) { + // In practice this scalar implementation is unlikely to be used. See next if_then_else(). + return bit_pun<Vec<1,T>>(( cond & bit_pun<Vec<1, M<T>>>(t)) | + (~cond & bit_pun<Vec<1, M<T>>>(e)) ); +} +SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) { + // Specializations inline here so they can generalize what types the apply to. +#if SKVX_USE_SIMD && defined(__AVX2__) + if constexpr (N*sizeof(T) == 32) { + return bit_pun<Vec<N,T>>(_mm256_blendv_epi8(bit_pun<__m256i>(e), + bit_pun<__m256i>(t), + bit_pun<__m256i>(cond))); + } +#endif +#if SKVX_USE_SIMD && defined(__SSE4_1__) + if constexpr (N*sizeof(T) == 16) { + return bit_pun<Vec<N,T>>(_mm_blendv_epi8(bit_pun<__m128i>(e), + bit_pun<__m128i>(t), + bit_pun<__m128i>(cond))); + } +#endif +#if SKVX_USE_SIMD && defined(__ARM_NEON) + if constexpr (N*sizeof(T) == 16) { + return bit_pun<Vec<N,T>>(vbslq_u8(bit_pun<uint8x16_t>(cond), + bit_pun<uint8x16_t>(t), + bit_pun<uint8x16_t>(e))); + } +#endif + // Recurse for large vectors to try to hit the specializations above. + if constexpr (N*sizeof(T) > 16) { + return join(if_then_else(cond.lo, t.lo, e.lo), + if_then_else(cond.hi, t.hi, e.hi)); + } + // This default can lead to better code than the recursing onto scalars. + return naive_if_then_else(cond, t, e); +} + +SIT bool any(const Vec<1,T>& x) { return x.val != 0; } +SINT bool any(const Vec<N,T>& x) { + // For any(), the _mm_testz intrinsics are correct and don't require comparing 'x' to 0, so it's + // lower latency compared to _mm_movemask + _mm_compneq on plain SSE. +#if SKVX_USE_SIMD && defined(__AVX2__) + if constexpr (N*sizeof(T) == 32) { + return !_mm256_testz_si256(bit_pun<__m256i>(x), _mm256_set1_epi32(-1)); + } +#endif +#if SKVX_USE_SIMD && defined(__SSE_4_1__) + if constexpr (N*sizeof(T) == 16) { + return !_mm_testz_si128(bit_pun<__m128i>(x), _mm_set1_epi32(-1)); + } +#endif +#if SKVX_USE_SIMD && defined(__SSE__) + if constexpr (N*sizeof(T) == 16) { + // On SSE, movemask checks only the MSB in each lane, which is fine if the lanes were set + // directly from a comparison op (which sets all bits to 1 when true), but skvx::Vec<> + // treats any non-zero value as true, so we have to compare 'x' to 0 before calling movemask + return _mm_movemask_ps(_mm_cmpneq_ps(bit_pun<__m128>(x), _mm_set1_ps(0))) != 0b0000; + } +#endif +#if SKVX_USE_SIMD && defined(__aarch64__) + // On 64-bit NEON, take the max across lanes, which will be non-zero if any lane was true. + // The specific lane-size doesn't really matter in this case since it's really any set bit + // that we're looking for. + if constexpr (N*sizeof(T) == 8 ) { return vmaxv_u8 (bit_pun<uint8x8_t> (x)) > 0; } + if constexpr (N*sizeof(T) == 16) { return vmaxvq_u8(bit_pun<uint8x16_t>(x)) > 0; } +#endif +#if SKVX_USE_SIMD && defined(__wasm_simd128__) + if constexpr (N == 4 && sizeof(T) == 4) { + return wasm_i32x4_any_true(bit_pun<VExt<4,int>>(x)); + } +#endif + return any(x.lo) + || any(x.hi); +} + +SIT bool all(const Vec<1,T>& x) { return x.val != 0; } +SINT bool all(const Vec<N,T>& x) { +// Unlike any(), we have to respect the lane layout, or we'll miss cases where a +// true lane has a mix of 0 and 1 bits. +#if SKVX_USE_SIMD && defined(__SSE__) + // Unfortunately, the _mm_testc intrinsics don't let us avoid the comparison to 0 for all()'s + // correctness, so always just use the plain SSE version. + if constexpr (N == 4 && sizeof(T) == 4) { + return _mm_movemask_ps(_mm_cmpneq_ps(bit_pun<__m128>(x), _mm_set1_ps(0))) == 0b1111; + } +#endif +#if SKVX_USE_SIMD && defined(__aarch64__) + // On 64-bit NEON, take the min across the lanes, which will be non-zero if all lanes are != 0. + if constexpr (sizeof(T)==1 && N==8) {return vminv_u8 (bit_pun<uint8x8_t> (x)) > 0;} + if constexpr (sizeof(T)==1 && N==16) {return vminvq_u8 (bit_pun<uint8x16_t>(x)) > 0;} + if constexpr (sizeof(T)==2 && N==4) {return vminv_u16 (bit_pun<uint16x4_t>(x)) > 0;} + if constexpr (sizeof(T)==2 && N==8) {return vminvq_u16(bit_pun<uint16x8_t>(x)) > 0;} + if constexpr (sizeof(T)==4 && N==2) {return vminv_u32 (bit_pun<uint32x2_t>(x)) > 0;} + if constexpr (sizeof(T)==4 && N==4) {return vminvq_u32(bit_pun<uint32x4_t>(x)) > 0;} +#endif +#if SKVX_USE_SIMD && defined(__wasm_simd128__) + if constexpr (N == 4 && sizeof(T) == 4) { + return wasm_i32x4_all_true(bit_pun<VExt<4,int>>(x)); + } +#endif + return all(x.lo) + && all(x.hi); +} + +// cast() Vec<N,S> to Vec<N,D>, as if applying a C-cast to each lane. +// TODO: implement with map()? +template <typename D, typename S> +SI Vec<1,D> cast(const Vec<1,S>& src) { return (D)src.val; } + +template <typename D, int N, typename S> +SI Vec<N,D> cast(const Vec<N,S>& src) { +#if SKVX_USE_SIMD && defined(__clang__) + return to_vec(__builtin_convertvector(to_vext(src), VExt<N,D>)); +#else + return join(cast<D>(src.lo), cast<D>(src.hi)); +#endif +} + +// min/max match logic of std::min/std::max, which is important when NaN is involved. +SIT T min(const Vec<1,T>& x) { return x.val; } +SIT T max(const Vec<1,T>& x) { return x.val; } +SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); } +SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); } + +SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); } +SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); } + +SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); } +SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); } +SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); } +SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); } + +// pin matches the logic of SkTPin, which is important when NaN is involved. It always returns +// values in the range lo..hi, and if x is NaN, it returns lo. +SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) { + return max(lo, min(x, hi)); +} + +// Shuffle values from a vector pretty arbitrarily: +// skvx::Vec<4,float> rgba = {R,G,B,A}; +// shuffle<2,1,0,3> (rgba) ~> {B,G,R,A} +// shuffle<2,1> (rgba) ~> {B,G} +// shuffle<2,1,2,1,2,1,2,1>(rgba) ~> {B,G,B,G,B,G,B,G} +// shuffle<3,3,3,3> (rgba) ~> {A,A,A,A} +// The only real restriction is that the output also be a legal N=power-of-two sknx::Vec. +template <int... Ix, int N, typename T> +SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) { +#if SKVX_USE_SIMD && defined(__clang__) + // TODO: can we just always use { x[Ix]... }? + return to_vec<sizeof...(Ix),T>(__builtin_shufflevector(to_vext(x), to_vext(x), Ix...)); +#else + return { x[Ix]... }; +#endif +} + +// Call map(fn, x) for a vector with fn() applied to each lane of x, { fn(x[0]), fn(x[1]), ... }, +// or map(fn, x,y) for a vector of fn(x[i], y[i]), etc. + +template <typename Fn, typename... Args, size_t... I> +SI auto map(std::index_sequence<I...>, + Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> { + auto lane = [&](size_t i) +#if defined(__clang__) + // CFI, specifically -fsanitize=cfi-icall, seems to give a false positive here, + // with errors like "control flow integrity check for type 'float (float) + // noexcept' failed during indirect function call... note: sqrtf.cfi_jt defined + // here". But we can be quite sure fn is the right type: it's all inferred! + // So, stifle CFI in this function. + __attribute__((no_sanitize("cfi"))) +#endif + { return fn(args[static_cast<int>(i)]...); }; + + return { lane(I)... }; +} + +template <typename Fn, int N, typename T, typename... Rest> +auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) { + // Derive an {0...N-1} index_sequence from the size of the first arg: N lanes in, N lanes out. + return map(std::make_index_sequence<N>{}, fn, first,rest...); +} + +SIN Vec<N,float> ceil(const Vec<N,float>& x) { return map( ceilf, x); } +SIN Vec<N,float> floor(const Vec<N,float>& x) { return map(floorf, x); } +SIN Vec<N,float> trunc(const Vec<N,float>& x) { return map(truncf, x); } +SIN Vec<N,float> round(const Vec<N,float>& x) { return map(roundf, x); } +SIN Vec<N,float> sqrt(const Vec<N,float>& x) { return map( sqrtf, x); } +SIN Vec<N,float> abs(const Vec<N,float>& x) { return map( fabsf, x); } +SIN Vec<N,float> fma(const Vec<N,float>& x, + const Vec<N,float>& y, + const Vec<N,float>& z) { + // I don't understand why Clang's codegen is terrible if we write map(fmaf, x,y,z) directly. + auto fn = [](float x, float y, float z) { return fmaf(x,y,z); }; + return map(fn, x,y,z); +} + +SI Vec<1,int> lrint(const Vec<1,float>& x) { + return (int)lrintf(x.val); +} +SIN Vec<N,int> lrint(const Vec<N,float>& x) { +#if SKVX_USE_SIMD && defined(__AVX__) + if constexpr (N == 8) { + return bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(bit_pun<__m256>(x))); + } +#endif +#if SKVX_USE_SIMD && defined(__SSE__) + if constexpr (N == 4) { + return bit_pun<Vec<N,int>>(_mm_cvtps_epi32(bit_pun<__m128>(x))); + } +#endif + return join(lrint(x.lo), + lrint(x.hi)); +} + +SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); } + +// Assumes inputs are finite and treat/flush denorm half floats as/to zero. +// Key constants to watch for: +// - a float is 32-bit, 1-8-23 sign-exponent-mantissa, with 127 exponent bias; +// - a half is 16-bit, 1-5-10 sign-exponent-mantissa, with 15 exponent bias. +SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) { + Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x), + s = sem & 0x8000'0000, + em = sem ^ s, + is_norm = em > 0x387f'd000, // halfway between largest f16 denorm and smallest norm + norm = (em>>13) - ((127-15)<<10); + return cast<uint16_t>((s>>16) | (is_norm & norm)); +} +SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) { + Vec<N,uint32_t> wide = cast<uint32_t>(x), + s = wide & 0x8000, + em = wide ^ s, + is_norm = em > 0x3ff, + norm = (em<<13) + ((127-15)<<23); + return bit_pun<Vec<N,float>>((s<<16) | (is_norm & norm)); +} + +// Like if_then_else(), these N=1 base cases won't actually be used unless explicitly called. +SI Vec<1,uint16_t> to_half(const Vec<1,float>& x) { return to_half_finite_ftz(x); } +SI Vec<1,float> from_half(const Vec<1,uint16_t>& x) { return from_half_finite_ftz(x); } + +SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) { +#if SKVX_USE_SIMD && defined(__F16C__) + if constexpr (N == 8) { + return bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(bit_pun<__m256>(x), + _MM_FROUND_TO_NEAREST_INT)); + } +#endif +#if SKVX_USE_SIMD && defined(__aarch64__) + if constexpr (N == 4) { + return bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(bit_pun<float32x4_t>(x))); + + } +#endif + if constexpr (N > 4) { + return join(to_half(x.lo), + to_half(x.hi)); + } + return to_half_finite_ftz(x); +} + +SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) { +#if SKVX_USE_SIMD && defined(__F16C__) + if constexpr (N == 8) { + return bit_pun<Vec<N,float>>(_mm256_cvtph_ps(bit_pun<__m128i>(x))); + } +#endif +#if SKVX_USE_SIMD && defined(__aarch64__) + if constexpr (N == 4) { + return bit_pun<Vec<N,float>>(vcvt_f32_f16(bit_pun<float16x4_t>(x))); + } +#endif + if constexpr (N > 4) { + return join(from_half(x.lo), + from_half(x.hi)); + } + return from_half_finite_ftz(x); +} + +// div255(x) = (x + 127) / 255 is a bit-exact rounding divide-by-255, packing down to 8-bit. +SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) { + return cast<uint8_t>( (x+127)/255 ); +} + +// approx_scale(x,y) approximates div255(cast<uint16_t>(x)*cast<uint16_t>(y)) within a bit, +// and is always perfect when x or y is 0 or 255. +SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) { + // All of (x*y+x)/256, (x*y+y)/256, and (x*y+255)/256 meet the criteria above. + // We happen to have historically picked (x*y+x)/256. + auto X = cast<uint16_t>(x), + Y = cast<uint16_t>(y); + return cast<uint8_t>( (X*Y+X)/256 ); +} + +// saturated_add(x,y) sums values and clamps to the maximum value instead of overflowing. +SINT std::enable_if_t<std::is_unsigned_v<T>, Vec<N,T>> saturated_add(const Vec<N,T>& x, + const Vec<N,T>& y) { +#if SKVX_USE_SIMD && (defined(__SSE__) || defined(__ARM_NEON)) + // Both SSE and ARM have 16-lane saturated adds, so use intrinsics for those and recurse down + // or join up to take advantage. + if constexpr (N == 16 && sizeof(T) == 1) { + #if defined(__SSE__) + return bit_pun<Vec<N,T>>(_mm_adds_epu8(bit_pun<__m128i>(x), bit_pun<__m128i>(y))); + #else // __ARM_NEON + return bit_pun<Vec<N,T>>(vqaddq_u8(bit_pun<uint8x16_t>(x), bit_pun<uint8x16_t>(y))); + #endif + } else if constexpr (N < 16 && sizeof(T) == 1) { + return saturated_add(join(x,x), join(y,y)).lo; + } else if constexpr (sizeof(T) == 1) { + return join(saturated_add(x.lo, y.lo), saturated_add(x.hi, y.hi)); + } +#endif + // Otherwise saturate manually + auto sum = x + y; + return if_then_else(sum < x, Vec<N,T>(std::numeric_limits<T>::max()), sum); +} + +// The ScaledDividerU32 takes a divisor > 1, and creates a function divide(numerator) that +// calculates a numerator / denominator. For this to be rounded properly, numerator should have +// half added in: +// divide(numerator + half) == floor(numerator/denominator + 1/2). +// +// This gives an answer within +/- 1 from the true value. +// +// Derivation of half: +// numerator/denominator + 1/2 = (numerator + half) / d +// numerator + denominator / 2 = numerator + half +// half = denominator / 2. +// +// Because half is divided by 2, that division must also be rounded. +// half == denominator / 2 = (denominator + 1) / 2. +// +// The divisorFactor is just a scaled value: +// divisorFactor = (1 / divisor) * 2 ^ 32. +// The maximum that can be divided and rounded is UINT_MAX - half. +class ScaledDividerU32 { +public: + explicit ScaledDividerU32(uint32_t divisor) + : fDivisorFactor{(uint32_t)(std::round((1.0 / divisor) * (1ull << 32)))} + , fHalf{(divisor + 1) >> 1} { + assert(divisor > 1); + } + + Vec<4, uint32_t> divide(const Vec<4, uint32_t>& numerator) const { +#if SKVX_USE_SIMD && defined(__ARM_NEON) + uint64x2_t hi = vmull_n_u32(vget_high_u32(to_vext(numerator)), fDivisorFactor); + uint64x2_t lo = vmull_n_u32(vget_low_u32(to_vext(numerator)), fDivisorFactor); + + return to_vec<4, uint32_t>(vcombine_u32(vshrn_n_u64(lo,32), vshrn_n_u64(hi,32))); +#else + return cast<uint32_t>((cast<uint64_t>(numerator) * fDivisorFactor) >> 32); +#endif + } + + uint32_t half() const { return fHalf; } + +private: + const uint32_t fDivisorFactor; + const uint32_t fHalf; +}; + + +SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x, + const Vec<N,uint8_t>& y) { +#if SKVX_USE_SIMD && defined(__ARM_NEON) + // With NEON we can do eight u8*u8 -> u16 in one instruction, vmull_u8 (read, mul-long). + if constexpr (N == 8) { + return to_vec<8,uint16_t>(vmull_u8(to_vext(x), to_vext(y))); + } else if constexpr (N < 8) { + return mull(join(x,x), join(y,y)).lo; + } else { // N > 8 + return join(mull(x.lo, y.lo), mull(x.hi, y.hi)); + } +#else + return cast<uint16_t>(x) * cast<uint16_t>(y); +#endif +} + +SIN Vec<N,uint32_t> mull(const Vec<N,uint16_t>& x, + const Vec<N,uint16_t>& y) { +#if SKVX_USE_SIMD && defined(__ARM_NEON) + // NEON can do four u16*u16 -> u32 in one instruction, vmull_u16 + if constexpr (N == 4) { + return to_vec<4,uint32_t>(vmull_u16(to_vext(x), to_vext(y))); + } else if constexpr (N < 4) { + return mull(join(x,x), join(y,y)).lo; + } else { // N > 4 + return join(mull(x.lo, y.lo), mull(x.hi, y.hi)); + } +#else + return cast<uint32_t>(x) * cast<uint32_t>(y); +#endif +} + +SIN Vec<N,uint16_t> mulhi(const Vec<N,uint16_t>& x, + const Vec<N,uint16_t>& y) { +#if SKVX_USE_SIMD && defined(__SSE__) + // Use _mm_mulhi_epu16 for 8xuint16_t and join or split to get there. + if constexpr (N == 8) { + return bit_pun<Vec<8,uint16_t>>(_mm_mulhi_epu16(bit_pun<__m128i>(x), bit_pun<__m128i>(y))); + } else if constexpr (N < 8) { + return mulhi(join(x,x), join(y,y)).lo; + } else { // N > 8 + return join(mulhi(x.lo, y.lo), mulhi(x.hi, y.hi)); + } +#else + return skvx::cast<uint16_t>(mull(x, y) >> 16); +#endif +} + +SINT T dot(const Vec<N, T>& a, const Vec<N, T>& b) { + // While dot is a "horizontal" operation like any or all, it needs to remain + // in floating point and there aren't really any good SIMD instructions that make it faster. + // The constexpr cases remove the for loop in the only cases we realistically call. + auto ab = a*b; + if constexpr (N == 2) { + return ab[0] + ab[1]; + } else if constexpr (N == 4) { + return ab[0] + ab[1] + ab[2] + ab[3]; + } else { + T sum = ab[0]; + for (int i = 1; i < N; ++i) { + sum += ab[i]; + } + return sum; + } +} + +SIT T cross(const Vec<2, T>& a, const Vec<2, T>& b) { + auto x = a * shuffle<1,0>(b); + return x[0] - x[1]; +} + +SIN float length(const Vec<N, float>& v) { + return std::sqrt(dot(v, v)); +} + +SIN double length(const Vec<N, double>& v) { + return std::sqrt(dot(v, v)); +} + +SIN Vec<N, float> normalize(const Vec<N, float>& v) { + return v / length(v); +} + +SIN Vec<N, double> normalize(const Vec<N, double>& v) { + return v / length(v); +} + +SINT bool isfinite(const Vec<N, T>& v) { + // Multiply all values together with 0. If they were all finite, the output is + // 0 (also finite). If any were not, we'll get nan. + return std::isfinite(dot(v, Vec<N, T>(0))); +} + +// De-interleaving load of 4 vectors. +// +// WARNING: These are really only supported well on NEON. Consider restructuring your data before +// resorting to these methods. +SIT void strided_load4(const T* v, + Vec<1,T>& a, + Vec<1,T>& b, + Vec<1,T>& c, + Vec<1,T>& d) { + a.val = v[0]; + b.val = v[1]; + c.val = v[2]; + d.val = v[3]; +} +SINT void strided_load4(const T* v, + Vec<N,T>& a, + Vec<N,T>& b, + Vec<N,T>& c, + Vec<N,T>& d) { + strided_load4(v, a.lo, b.lo, c.lo, d.lo); + strided_load4(v + 4*(N/2), a.hi, b.hi, c.hi, d.hi); +} +#if SKVX_USE_SIMD && defined(__ARM_NEON) +#define IMPL_LOAD4_TRANSPOSED(N, T, VLD) \ +SI void strided_load4(const T* v, \ + Vec<N,T>& a, \ + Vec<N,T>& b, \ + Vec<N,T>& c, \ + Vec<N,T>& d) { \ + auto mat = VLD(v); \ + a = bit_pun<Vec<N,T>>(mat.val[0]); \ + b = bit_pun<Vec<N,T>>(mat.val[1]); \ + c = bit_pun<Vec<N,T>>(mat.val[2]); \ + d = bit_pun<Vec<N,T>>(mat.val[3]); \ +} +IMPL_LOAD4_TRANSPOSED(2, uint32_t, vld4_u32) +IMPL_LOAD4_TRANSPOSED(4, uint16_t, vld4_u16) +IMPL_LOAD4_TRANSPOSED(8, uint8_t, vld4_u8) +IMPL_LOAD4_TRANSPOSED(2, int32_t, vld4_s32) +IMPL_LOAD4_TRANSPOSED(4, int16_t, vld4_s16) +IMPL_LOAD4_TRANSPOSED(8, int8_t, vld4_s8) +IMPL_LOAD4_TRANSPOSED(2, float, vld4_f32) +IMPL_LOAD4_TRANSPOSED(4, uint32_t, vld4q_u32) +IMPL_LOAD4_TRANSPOSED(8, uint16_t, vld4q_u16) +IMPL_LOAD4_TRANSPOSED(16, uint8_t, vld4q_u8) +IMPL_LOAD4_TRANSPOSED(4, int32_t, vld4q_s32) +IMPL_LOAD4_TRANSPOSED(8, int16_t, vld4q_s16) +IMPL_LOAD4_TRANSPOSED(16, int8_t, vld4q_s8) +IMPL_LOAD4_TRANSPOSED(4, float, vld4q_f32) +#undef IMPL_LOAD4_TRANSPOSED + +#elif SKVX_USE_SIMD && defined(__SSE__) + +SI void strided_load4(const float* v, + Vec<4,float>& a, + Vec<4,float>& b, + Vec<4,float>& c, + Vec<4,float>& d) { + __m128 a_ = _mm_loadu_ps(v); + __m128 b_ = _mm_loadu_ps(v+4); + __m128 c_ = _mm_loadu_ps(v+8); + __m128 d_ = _mm_loadu_ps(v+12); + _MM_TRANSPOSE4_PS(a_, b_, c_, d_); + a = bit_pun<Vec<4,float>>(a_); + b = bit_pun<Vec<4,float>>(b_); + c = bit_pun<Vec<4,float>>(c_); + d = bit_pun<Vec<4,float>>(d_); +} +#endif + +// De-interleaving load of 2 vectors. +// +// WARNING: These are really only supported well on NEON. Consider restructuring your data before +// resorting to these methods. +SIT void strided_load2(const T* v, Vec<1,T>& a, Vec<1,T>& b) { + a.val = v[0]; + b.val = v[1]; +} +SINT void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) { + strided_load2(v, a.lo, b.lo); + strided_load2(v + 2*(N/2), a.hi, b.hi); +} +#if SKVX_USE_SIMD && defined(__ARM_NEON) +#define IMPL_LOAD2_TRANSPOSED(N, T, VLD) \ +SI void strided_load2(const T* v, Vec<N,T>& a, Vec<N,T>& b) { \ + auto mat = VLD(v); \ + a = bit_pun<Vec<N,T>>(mat.val[0]); \ + b = bit_pun<Vec<N,T>>(mat.val[1]); \ +} +IMPL_LOAD2_TRANSPOSED(2, uint32_t, vld2_u32) +IMPL_LOAD2_TRANSPOSED(4, uint16_t, vld2_u16) +IMPL_LOAD2_TRANSPOSED(8, uint8_t, vld2_u8) +IMPL_LOAD2_TRANSPOSED(2, int32_t, vld2_s32) +IMPL_LOAD2_TRANSPOSED(4, int16_t, vld2_s16) +IMPL_LOAD2_TRANSPOSED(8, int8_t, vld2_s8) +IMPL_LOAD2_TRANSPOSED(2, float, vld2_f32) +IMPL_LOAD2_TRANSPOSED(4, uint32_t, vld2q_u32) +IMPL_LOAD2_TRANSPOSED(8, uint16_t, vld2q_u16) +IMPL_LOAD2_TRANSPOSED(16, uint8_t, vld2q_u8) +IMPL_LOAD2_TRANSPOSED(4, int32_t, vld2q_s32) +IMPL_LOAD2_TRANSPOSED(8, int16_t, vld2q_s16) +IMPL_LOAD2_TRANSPOSED(16, int8_t, vld2q_s8) +IMPL_LOAD2_TRANSPOSED(4, float, vld2q_f32) +#undef IMPL_LOAD2_TRANSPOSED +#endif + +// Define commonly used aliases +using float2 = Vec< 2, float>; +using float4 = Vec< 4, float>; +using float8 = Vec< 8, float>; + +using double2 = Vec< 2, double>; +using double4 = Vec< 4, double>; +using double8 = Vec< 8, double>; + +using byte2 = Vec< 2, uint8_t>; +using byte4 = Vec< 4, uint8_t>; +using byte8 = Vec< 8, uint8_t>; +using byte16 = Vec<16, uint8_t>; + +using int2 = Vec< 2, int32_t>; +using int4 = Vec< 4, int32_t>; +using int8 = Vec< 8, int32_t>; + +using uint2 = Vec< 2, uint32_t>; +using uint4 = Vec< 4, uint32_t>; +using uint8 = Vec< 8, uint32_t>; + +using long2 = Vec< 2, int64_t>; +using long4 = Vec< 4, int64_t>; +using long8 = Vec< 8, int64_t>; + +// Use with from_half and to_half to convert between floatX, and use these for storage. +using half2 = Vec< 2, uint16_t>; +using half4 = Vec< 4, uint16_t>; +using half8 = Vec< 8, uint16_t>; + +} // namespace skvx + +#undef SINTU +#undef SINT +#undef SIN +#undef SIT +#undef SI +#undef SKVX_ALWAYS_INLINE +#undef SKVX_USE_SIMD + +#endif//SKVX_DEFINED diff --git a/gfx/skia/skia/src/base/SkZip.h b/gfx/skia/skia/src/base/SkZip.h new file mode 100644 index 0000000000..884aa11d8d --- /dev/null +++ b/gfx/skia/skia/src/base/SkZip.h @@ -0,0 +1,215 @@ +/* + * Copyright 2019 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkZip_DEFINED +#define SkZip_DEFINED + +#include "include/private/base/SkAssert.h" +#include "include/private/base/SkDebug.h" +#include "include/private/base/SkSpan_impl.h" + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <tuple> +#include <utility> + +// Take a list of things that can be pointers, and use them all in parallel. The iterators and +// accessor operator[] for the class produce a tuple of the items. +template<typename... Ts> +class SkZip { + using ReturnTuple = std::tuple<Ts&...>; + + class Iterator { + public: + using value_type = ReturnTuple; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using reference = value_type; + using iterator_category = std::input_iterator_tag; + constexpr Iterator(const SkZip* zip, size_t index) : fZip{zip}, fIndex{index} { } + constexpr Iterator(const Iterator& that) : Iterator{ that.fZip, that.fIndex } { } + constexpr Iterator& operator++() { ++fIndex; return *this; } + constexpr Iterator operator++(int) { Iterator tmp(*this); operator++(); return tmp; } + constexpr bool operator==(const Iterator& rhs) const { return fIndex == rhs.fIndex; } + constexpr bool operator!=(const Iterator& rhs) const { return fIndex != rhs.fIndex; } + constexpr reference operator*() { return (*fZip)[fIndex]; } + friend constexpr difference_type operator-(Iterator lhs, Iterator rhs) { + return lhs.fIndex - rhs.fIndex; + } + + private: + const SkZip* const fZip = nullptr; + size_t fIndex = 0; + }; + + template<typename T> + inline static constexpr T* nullify = nullptr; + +public: + constexpr SkZip() : fPointers{nullify<Ts>...}, fSize{0} {} + constexpr SkZip(size_t) = delete; + constexpr SkZip(size_t size, Ts*... ts) + : fPointers{ts...} + , fSize{size} {} + constexpr SkZip(const SkZip& that) = default; + constexpr SkZip& operator=(const SkZip &that) = default; + + // Check to see if U can be used for const T or is the same as T + template <typename U, typename T> + using CanConvertToConst = typename std::integral_constant<bool, + std::is_convertible<U*, T*>::value && sizeof(U) == sizeof(T)>::type; + + // Allow SkZip<const T> to be constructed from SkZip<T>. + template<typename... Us, + typename = std::enable_if<std::conjunction<CanConvertToConst<Us, Ts>...>::value>> + constexpr SkZip(const SkZip<Us...>& that) + : fPointers(that.data()) + , fSize{that.size()} { } + + constexpr ReturnTuple operator[](size_t i) const { return this->index(i);} + constexpr size_t size() const { return fSize; } + constexpr bool empty() const { return this->size() == 0; } + constexpr ReturnTuple front() const { return this->index(0); } + constexpr ReturnTuple back() const { return this->index(this->size() - 1); } + constexpr Iterator begin() const { return Iterator{this, 0}; } + constexpr Iterator end() const { return Iterator{this, this->size()}; } + template<size_t I> constexpr auto get() const { + return SkSpan(std::get<I>(fPointers), fSize); + } + constexpr std::tuple<Ts*...> data() const { return fPointers; } + constexpr SkZip first(size_t n) const { + SkASSERT(n <= this->size()); + if (n == 0) { return SkZip(); } + return SkZip{n, fPointers}; + } + constexpr SkZip last(size_t n) const { + SkASSERT(n <= this->size()); + if (n == 0) { return SkZip(); } + return SkZip{n, this->pointersAt(fSize - n)}; + } + constexpr SkZip subspan(size_t offset, size_t count) const { + SkASSERT(offset < this->size()); + SkASSERT(count <= this->size() - offset); + if (count == 0) { return SkZip(); } + return SkZip(count, pointersAt(offset)); + } + +private: + constexpr SkZip(size_t n, const std::tuple<Ts*...>& pointers) + : fPointers{pointers} + , fSize{n} {} + + constexpr ReturnTuple index(size_t i) const { + SkASSERT(this->size() > 0); + SkASSERT(i < this->size()); + return indexDetail(i, std::make_index_sequence<sizeof...(Ts)>{}); + } + + template<std::size_t... Is> + constexpr ReturnTuple indexDetail(size_t i, std::index_sequence<Is...>) const { + return ReturnTuple((std::get<Is>(fPointers))[i]...); + } + + std::tuple<Ts*...> pointersAt(size_t i) const { + SkASSERT(this->size() > 0); + SkASSERT(i < this->size()); + return pointersAtDetail(i, std::make_index_sequence<sizeof...(Ts)>{}); + } + + template<std::size_t... Is> + constexpr std::tuple<Ts*...> pointersAtDetail(size_t i, std::index_sequence<Is...>) const { + return std::tuple<Ts*...>{&(std::get<Is>(fPointers))[i]...}; + } + + std::tuple<Ts*...> fPointers; + size_t fSize; +}; + +class SkMakeZipDetail { + template<typename T> struct DecayPointer{ + using U = typename std::remove_cv<typename std::remove_reference<T>::type>::type; + using type = typename std::conditional<std::is_pointer<U>::value, U, T>::type; + }; + template<typename T> using DecayPointerT = typename DecayPointer<T>::type; + + template<typename C> struct ContiguousMemory { }; + template<typename T> struct ContiguousMemory<T*> { + using value_type = T; + static constexpr value_type* Data(T* t) { return t; } + static constexpr size_t Size(T* s) { return SIZE_MAX; } + }; + template<typename T, size_t N> struct ContiguousMemory<T(&)[N]> { + using value_type = T; + static constexpr value_type* Data(T(&t)[N]) { return t; } + static constexpr size_t Size(T(&)[N]) { return N; } + }; + // In general, we don't want r-value collections, but SkSpans are ok, because they are a view + // onto an actual container. + template<typename T> struct ContiguousMemory<SkSpan<T>> { + using value_type = T; + static constexpr value_type* Data(SkSpan<T> s) { return s.data(); } + static constexpr size_t Size(SkSpan<T> s) { return s.size(); } + }; + // Only accept l-value references to collections. + template<typename C> struct ContiguousMemory<C&> { + using value_type = typename std::remove_pointer<decltype(std::declval<C>().data())>::type; + static constexpr value_type* Data(C& c) { return c.data(); } + static constexpr size_t Size(C& c) { return c.size(); } + }; + template<typename C> using Span = ContiguousMemory<DecayPointerT<C>>; + template<typename C> using ValueType = typename Span<C>::value_type; + + template<typename C, typename... Ts> struct PickOneSize { }; + template <typename T, typename... Ts> struct PickOneSize<T*, Ts...> { + static constexpr size_t Size(T* t, Ts... ts) { + return PickOneSize<Ts...>::Size(std::forward<Ts>(ts)...); + } + }; + template <typename T, typename... Ts, size_t N> struct PickOneSize<T(&)[N], Ts...> { + static constexpr size_t Size(T(&)[N], Ts...) { return N; } + }; + template<typename T, typename... Ts> struct PickOneSize<SkSpan<T>, Ts...> { + static constexpr size_t Size(SkSpan<T> s, Ts...) { return s.size(); } + }; + template<typename C, typename... Ts> struct PickOneSize<C&, Ts...> { + static constexpr size_t Size(C& c, Ts...) { return c.size(); } + }; + +public: + template<typename... Ts> + static constexpr auto MakeZip(Ts&& ... ts) { + + // Pick the first collection that has a size, and use that for the size. + size_t size = PickOneSize<DecayPointerT<Ts>...>::Size(std::forward<Ts>(ts)...); + +#ifdef SK_DEBUG + // Check that all sizes are the same. + size_t minSize = SIZE_MAX; + size_t maxSize = 0; + for (size_t s : {Span<Ts>::Size(std::forward<Ts>(ts))...}) { + if (s != SIZE_MAX) { + minSize = std::min(minSize, s); + maxSize = std::max(maxSize, s); + } + } + SkASSERT(minSize == maxSize); +#endif + + return SkZip<ValueType<Ts>...>{size, Span<Ts>::Data(std::forward<Ts>(ts))...}; + } +}; + +template<typename... Ts> +SkZip(size_t size, Ts*... ts) -> SkZip<Ts...>; + +template<typename... Ts> +inline constexpr auto SkMakeZip(Ts&& ... ts) { + return SkMakeZipDetail::MakeZip(std::forward<Ts>(ts)...); +} +#endif //SkZip_DEFINED |