summaryrefslogtreecommitdiffstats
path: root/gfx/wr/swgl/src/gl.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /gfx/wr/swgl/src/gl.cc
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/wr/swgl/src/gl.cc')
-rw-r--r--gfx/wr/swgl/src/gl.cc2851
1 files changed, 2851 insertions, 0 deletions
diff --git a/gfx/wr/swgl/src/gl.cc b/gfx/wr/swgl/src/gl.cc
new file mode 100644
index 0000000000..0b09e29008
--- /dev/null
+++ b/gfx/wr/swgl/src/gl.cc
@@ -0,0 +1,2851 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef __MACH__
+# include <mach/mach.h>
+# include <mach/mach_time.h>
+#else
+# include <time.h>
+#endif
+
+#ifdef NDEBUG
+# define debugf(...)
+#else
+# define debugf(...) printf(__VA_ARGS__)
+#endif
+
+// #define PRINT_TIMINGS
+
+#ifdef _WIN32
+# define ALWAYS_INLINE __forceinline
+# define NO_INLINE __declspec(noinline)
+
+// Including Windows.h brings a huge amount of namespace polution so just
+// define a couple of things manually
+typedef int BOOL;
+# define WINAPI __stdcall
+# define DECLSPEC_IMPORT __declspec(dllimport)
+# define WINBASEAPI DECLSPEC_IMPORT
+typedef unsigned long DWORD;
+typedef long LONG;
+typedef __int64 LONGLONG;
+# define DUMMYSTRUCTNAME
+
+typedef union _LARGE_INTEGER {
+ struct {
+ DWORD LowPart;
+ LONG HighPart;
+ } DUMMYSTRUCTNAME;
+ struct {
+ DWORD LowPart;
+ LONG HighPart;
+ } u;
+ LONGLONG QuadPart;
+} LARGE_INTEGER;
+extern "C" {
+WINBASEAPI BOOL WINAPI
+QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount);
+
+WINBASEAPI BOOL WINAPI QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency);
+}
+
+#else
+// GCC is slower when dealing with always_inline, especially in debug builds.
+// When using Clang, use always_inline more aggressively.
+# if defined(__clang__) || defined(NDEBUG)
+# define ALWAYS_INLINE __attribute__((always_inline)) inline
+# else
+# define ALWAYS_INLINE inline
+# endif
+# define NO_INLINE __attribute__((noinline))
+#endif
+
+// Some functions may cause excessive binary bloat if inlined in debug or with
+// GCC builds, so use PREFER_INLINE on these instead of ALWAYS_INLINE.
+#if defined(__clang__) && defined(NDEBUG)
+# define PREFER_INLINE ALWAYS_INLINE
+#else
+# define PREFER_INLINE inline
+#endif
+
+#define UNREACHABLE __builtin_unreachable()
+
+#define UNUSED [[maybe_unused]]
+
+#define FALLTHROUGH [[fallthrough]]
+
+#if defined(MOZILLA_CLIENT) && defined(MOZ_CLANG_PLUGIN)
+# define IMPLICIT __attribute__((annotate("moz_implicit")))
+#else
+# define IMPLICIT
+#endif
+
+#include "gl_defs.h"
+#include "glsl.h"
+#include "program.h"
+#include "texture.h"
+
+using namespace glsl;
+
+typedef ivec2_scalar IntPoint;
+
+struct IntRect {
+ int x0;
+ int y0;
+ int x1;
+ int y1;
+
+ IntRect() : x0(0), y0(0), x1(0), y1(0) {}
+ IntRect(int x0, int y0, int x1, int y1) : x0(x0), y0(y0), x1(x1), y1(y1) {}
+ IntRect(IntPoint origin, IntPoint size)
+ : x0(origin.x),
+ y0(origin.y),
+ x1(origin.x + size.x),
+ y1(origin.y + size.y) {}
+
+ int width() const { return x1 - x0; }
+ int height() const { return y1 - y0; }
+ bool is_empty() const { return width() <= 0 || height() <= 0; }
+
+ IntPoint origin() const { return IntPoint(x0, y0); }
+
+ bool same_size(const IntRect& o) const {
+ return width() == o.width() && height() == o.height();
+ }
+
+ bool contains(const IntRect& o) const {
+ return o.x0 >= x0 && o.y0 >= y0 && o.x1 <= x1 && o.y1 <= y1;
+ }
+
+ IntRect& intersect(const IntRect& o) {
+ x0 = max(x0, o.x0);
+ y0 = max(y0, o.y0);
+ x1 = min(x1, o.x1);
+ y1 = min(y1, o.y1);
+ return *this;
+ }
+
+ IntRect intersection(const IntRect& o) {
+ IntRect result = *this;
+ result.intersect(o);
+ return result;
+ }
+
+ // Scale from source-space to dest-space, optionally rounding inward
+ IntRect& scale(int srcWidth, int srcHeight, int dstWidth, int dstHeight,
+ bool roundIn = false) {
+ x0 = (x0 * dstWidth + (roundIn ? srcWidth - 1 : 0)) / srcWidth;
+ y0 = (y0 * dstHeight + (roundIn ? srcHeight - 1 : 0)) / srcHeight;
+ x1 = (x1 * dstWidth) / srcWidth;
+ y1 = (y1 * dstHeight) / srcHeight;
+ return *this;
+ }
+
+ // Flip the rect's Y coords around inflection point at Y=offset
+ void invert_y(int offset) {
+ y0 = offset - y0;
+ y1 = offset - y1;
+ swap(y0, y1);
+ }
+
+ IntRect& offset(const IntPoint& o) {
+ x0 += o.x;
+ y0 += o.y;
+ x1 += o.x;
+ y1 += o.y;
+ return *this;
+ }
+
+ IntRect operator+(const IntPoint& o) const {
+ return IntRect(*this).offset(o);
+ }
+ IntRect operator-(const IntPoint& o) const {
+ return IntRect(*this).offset(-o);
+ }
+};
+
+typedef vec2_scalar Point2D;
+typedef vec4_scalar Point3D;
+
+struct IntRange {
+ int start;
+ int end;
+
+ int len() const { return end - start; }
+
+ IntRange intersect(IntRange r) const {
+ return {max(start, r.start), min(end, r.end)};
+ }
+};
+
+struct FloatRange {
+ float start;
+ float end;
+
+ float clip(float x) const { return clamp(x, start, end); }
+
+ FloatRange clip(FloatRange r) const { return {clip(r.start), clip(r.end)}; }
+
+ FloatRange merge(FloatRange r) const {
+ return {min(start, r.start), max(end, r.end)};
+ }
+
+ IntRange round() const {
+ return {int(floor(start + 0.5f)), int(floor(end + 0.5f))};
+ }
+
+ IntRange round_out() const { return {int(floor(start)), int(ceil(end))}; }
+};
+
+template <typename P>
+static inline FloatRange x_range(P p0, P p1) {
+ return {min(p0.x, p1.x), max(p0.x, p1.x)};
+}
+
+struct VertexAttrib {
+ size_t size = 0; // in bytes
+ GLenum type = 0;
+ bool normalized = false;
+ GLsizei stride = 0;
+ GLuint offset = 0;
+ bool enabled = false;
+ GLuint divisor = 0;
+ int vertex_array = 0;
+ int vertex_buffer = 0;
+ char* buf = nullptr; // XXX: this can easily dangle
+ size_t buf_size = 0; // this will let us bounds check
+
+ // Mark the buffer as invalid so we don't accidentally use stale data.
+ void disable() {
+ enabled = false;
+ buf = nullptr;
+ buf_size = 0;
+ }
+};
+
+static int bytes_for_internal_format(GLenum internal_format) {
+ switch (internal_format) {
+ case GL_RGBA32F:
+ return 4 * 4;
+ case GL_RGBA32I:
+ return 4 * 4;
+ case GL_RGBA8:
+ case GL_BGRA8:
+ case GL_RGBA:
+ return 4;
+ case GL_R8:
+ case GL_RED:
+ return 1;
+ case GL_RG8:
+ case GL_RG:
+ return 2;
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ return 4;
+ case GL_RGB_RAW_422_APPLE:
+ return 2;
+ case GL_R16:
+ return 2;
+ case GL_RG16:
+ return 4;
+ default:
+ debugf("internal format: %x\n", internal_format);
+ assert(0);
+ return 0;
+ }
+}
+
+static inline int aligned_stride(int row_bytes) { return (row_bytes + 3) & ~3; }
+
+static TextureFormat gl_format_to_texture_format(int type) {
+ switch (type) {
+ case GL_RGBA32F:
+ return TextureFormat::RGBA32F;
+ case GL_RGBA32I:
+ return TextureFormat::RGBA32I;
+ case GL_RGBA8:
+ return TextureFormat::RGBA8;
+ case GL_R8:
+ return TextureFormat::R8;
+ case GL_RG8:
+ return TextureFormat::RG8;
+ case GL_R16:
+ return TextureFormat::R16;
+ case GL_RG16:
+ return TextureFormat::RG16;
+ case GL_RGB_RAW_422_APPLE:
+ return TextureFormat::YUV422;
+ default:
+ assert(0);
+ return TextureFormat::RGBA8;
+ }
+}
+
+struct Query {
+ uint64_t value = 0;
+};
+
+struct Buffer {
+ char* buf = nullptr;
+ size_t size = 0;
+ size_t capacity = 0;
+
+ // Returns true if re-allocation succeeded, false otherwise...
+ bool allocate(size_t new_size) {
+ // If the size remains unchanged, don't allocate anything.
+ if (new_size == size) {
+ return true;
+ }
+ // If the new size is within the existing capacity of the buffer, just
+ // reuse the existing buffer.
+ if (new_size <= capacity) {
+ size = new_size;
+ return true;
+ }
+ // Otherwise we need to reallocate the buffer to hold up to the requested
+ // larger size.
+ char* new_buf = (char*)realloc(buf, new_size);
+ assert(new_buf);
+ if (!new_buf) {
+ // If we fail, null out the buffer rather than leave around the old
+ // allocation state.
+ cleanup();
+ return false;
+ }
+ // The reallocation succeeded, so install the buffer.
+ buf = new_buf;
+ size = new_size;
+ capacity = new_size;
+ return true;
+ }
+
+ void cleanup() {
+ if (buf) {
+ free(buf);
+ buf = nullptr;
+ size = 0;
+ capacity = 0;
+ }
+ }
+
+ ~Buffer() { cleanup(); }
+};
+
+struct Framebuffer {
+ GLuint color_attachment = 0;
+ GLuint depth_attachment = 0;
+};
+
+struct Renderbuffer {
+ GLuint texture = 0;
+
+ void on_erase();
+};
+
+TextureFilter gl_filter_to_texture_filter(int type) {
+ switch (type) {
+ case GL_NEAREST:
+ return TextureFilter::NEAREST;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ return TextureFilter::NEAREST;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ return TextureFilter::NEAREST;
+ case GL_LINEAR:
+ return TextureFilter::LINEAR;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ return TextureFilter::LINEAR;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ return TextureFilter::LINEAR;
+ default:
+ assert(0);
+ return TextureFilter::NEAREST;
+ }
+}
+
+struct Texture {
+ GLenum internal_format = 0;
+ int width = 0;
+ int height = 0;
+ char* buf = nullptr;
+ size_t buf_size = 0;
+ uint32_t buf_stride = 0;
+ uint8_t buf_bpp = 0;
+ GLenum min_filter = GL_NEAREST;
+ GLenum mag_filter = GL_LINEAR;
+ // The number of active locks on this texture. If this texture has any active
+ // locks, we need to disallow modifying or destroying the texture as it may
+ // be accessed by other threads where modifications could lead to races.
+ int32_t locked = 0;
+ // When used as an attachment of a framebuffer, rendering to the texture
+ // behaves as if it is located at the given offset such that the offset is
+ // subtracted from all transformed vertexes after the viewport is applied.
+ IntPoint offset;
+
+ enum FLAGS {
+ // If the buffer is internally-allocated by SWGL
+ SHOULD_FREE = 1 << 1,
+ // If the buffer has been cleared to initialize it. Currently this is only
+ // utilized by depth buffers which need to know when depth runs have reset
+ // to a valid row state. When unset, the depth runs may contain garbage.
+ CLEARED = 1 << 2,
+ };
+ int flags = SHOULD_FREE;
+ bool should_free() const { return bool(flags & SHOULD_FREE); }
+ bool cleared() const { return bool(flags & CLEARED); }
+
+ void set_flag(int flag, bool val) {
+ if (val) {
+ flags |= flag;
+ } else {
+ flags &= ~flag;
+ }
+ }
+ void set_should_free(bool val) {
+ // buf must be null before SHOULD_FREE can be safely toggled. Otherwise, we
+ // might accidentally mistakenly realloc an externally allocated buffer as
+ // if it were an internally allocated one.
+ assert(!buf);
+ set_flag(SHOULD_FREE, val);
+ }
+ void set_cleared(bool val) { set_flag(CLEARED, val); }
+
+ // Delayed-clearing state. When a clear of an FB is requested, we don't
+ // immediately clear each row, as the rows may be subsequently overwritten
+ // by draw calls, allowing us to skip the work of clearing the affected rows
+ // either fully or partially. Instead, we keep a bit vector of rows that need
+ // to be cleared later and save the value they need to be cleared with so
+ // that we can clear these rows individually when they are touched by draws.
+ // This currently only works for 2D textures, but not on texture arrays.
+ int delay_clear = 0;
+ uint32_t clear_val = 0;
+ uint32_t* cleared_rows = nullptr;
+
+ void init_depth_runs(uint32_t z);
+ void fill_depth_runs(uint32_t z, const IntRect& scissor);
+
+ void enable_delayed_clear(uint32_t val) {
+ delay_clear = height;
+ clear_val = val;
+ if (!cleared_rows) {
+ cleared_rows = new uint32_t[(height + 31) / 32];
+ }
+ memset(cleared_rows, 0, ((height + 31) / 32) * sizeof(uint32_t));
+ if (height & 31) {
+ cleared_rows[height / 32] = ~0U << (height & 31);
+ }
+ }
+
+ void disable_delayed_clear() {
+ if (cleared_rows) {
+ delete[] cleared_rows;
+ cleared_rows = nullptr;
+ delay_clear = 0;
+ }
+ }
+
+ int bpp() const { return buf_bpp; }
+ int compute_bpp() const { return bytes_for_internal_format(internal_format); }
+
+ size_t stride() const { return buf_stride; }
+ size_t compute_stride(int bpp, int width) const {
+ return aligned_stride(bpp * width);
+ }
+
+ // Set an external backing buffer of this texture.
+ void set_buffer(void* new_buf, size_t new_stride) {
+ assert(!should_free());
+ // Ensure that the supplied stride is at least as big as the row data and
+ // is aligned to the smaller of either the BPP or word-size. We need to at
+ // least be able to sample data from within a row and sample whole pixels
+ // of smaller formats without risking unaligned access.
+ int new_bpp = compute_bpp();
+ assert(new_stride >= size_t(new_bpp * width) &&
+ new_stride % min(new_bpp, sizeof(uint32_t)) == 0);
+
+ buf = (char*)new_buf;
+ buf_size = 0;
+ buf_bpp = new_bpp;
+ buf_stride = new_stride;
+ }
+
+ // Returns true if re-allocation succeeded, false otherwise...
+ bool allocate(bool force = false, int min_width = 0, int min_height = 0) {
+ assert(!locked); // Locked textures shouldn't be reallocated
+ // If we get here, some GL API call that invalidates the texture was used.
+ // Mark the buffer as not-cleared to signal this.
+ set_cleared(false);
+ // Check if there is either no buffer currently or if we forced validation
+ // of the buffer size because some dimension might have changed.
+ if ((!buf || force) && should_free()) {
+ // Compute the buffer's BPP and stride, since they may have changed.
+ int new_bpp = compute_bpp();
+ size_t new_stride = compute_stride(new_bpp, width);
+ // Compute new size based on the maximum potential stride, rather than
+ // the current stride, to hopefully avoid reallocations when size would
+ // otherwise change too much...
+ size_t max_stride = compute_stride(new_bpp, max(width, min_width));
+ size_t size = max_stride * max(height, min_height);
+ if ((!buf && size > 0) || size > buf_size) {
+ // Allocate with a SIMD register-sized tail of padding at the end so we
+ // can safely read or write past the end of the texture with SIMD ops.
+ // Currently only the flat Z-buffer texture needs this padding due to
+ // full-register loads and stores in check_depth and discard_depth. In
+ // case some code in the future accidentally uses a linear filter on a
+ // texture with less than 2 pixels per row, we also add this padding
+ // just to be safe. All other texture types and use-cases should be
+ // safe to omit padding.
+ size_t padding =
+ internal_format == GL_DEPTH_COMPONENT24 || max(width, min_width) < 2
+ ? sizeof(Float)
+ : 0;
+ char* new_buf = (char*)realloc(buf, size + padding);
+ assert(new_buf);
+ if (!new_buf) {
+ // Allocation failed, so ensure we don't leave stale buffer state.
+ cleanup();
+ return false;
+ }
+ // Successfully reallocated the buffer, so go ahead and set it.
+ buf = new_buf;
+ buf_size = size;
+ }
+ // Set the BPP and stride in case they changed.
+ buf_bpp = new_bpp;
+ buf_stride = new_stride;
+ }
+ // Allocation succeeded or nothing changed...
+ return true;
+ }
+
+ void cleanup() {
+ assert(!locked); // Locked textures shouldn't be destroyed
+ if (buf) {
+ // If we need to toggle SHOULD_FREE state, ensure that buf is nulled out,
+ // regardless of whether we internally allocated it. This will prevent us
+ // from wrongly treating buf as having been internally allocated for when
+ // we go to realloc if it actually was externally allocted.
+ if (should_free()) {
+ free(buf);
+ }
+ buf = nullptr;
+ buf_size = 0;
+ buf_bpp = 0;
+ buf_stride = 0;
+ }
+ disable_delayed_clear();
+ }
+
+ ~Texture() { cleanup(); }
+
+ IntRect bounds() const { return IntRect{0, 0, width, height}; }
+ IntRect offset_bounds() const { return bounds() + offset; }
+
+ // Find the valid sampling bounds relative to the requested region
+ IntRect sample_bounds(const IntRect& req, bool invertY = false) const {
+ IntRect bb = bounds().intersect(req) - req.origin();
+ if (invertY) bb.invert_y(req.height());
+ return bb;
+ }
+
+ // Get a pointer for sampling at the given offset
+ char* sample_ptr(int x, int y) const {
+ return buf + y * stride() + x * bpp();
+ }
+
+ // Get a pointer for sampling the requested region and limit to the provided
+ // sampling bounds
+ char* sample_ptr(const IntRect& req, const IntRect& bounds,
+ bool invertY = false) const {
+ // Offset the sample pointer by the clamped bounds
+ int x = req.x0 + bounds.x0;
+ // Invert the Y offset if necessary
+ int y = invertY ? req.y1 - 1 - bounds.y0 : req.y0 + bounds.y0;
+ return sample_ptr(x, y);
+ }
+};
+
+// The last vertex attribute is reserved as a null attribute in case a vertex
+// attribute is used without being set.
+#define MAX_ATTRIBS 17
+#define NULL_ATTRIB 16
+struct VertexArray {
+ VertexAttrib attribs[MAX_ATTRIBS];
+ int max_attrib = -1;
+ // The GL spec defines element array buffer binding to be part of VAO state.
+ GLuint element_array_buffer_binding = 0;
+
+ void validate();
+};
+
+struct Shader {
+ GLenum type = 0;
+ ProgramLoader loader = nullptr;
+};
+
+struct Program {
+ ProgramImpl* impl = nullptr;
+ VertexShaderImpl* vert_impl = nullptr;
+ FragmentShaderImpl* frag_impl = nullptr;
+ bool deleted = false;
+
+ ~Program() { delete impl; }
+};
+
+// clang-format off
+// Fully-expand GL defines while ignoring more than 4 suffixes
+#define CONCAT_KEY(prefix, x, y, z, w, ...) prefix##x##y##z##w
+// Generate a blend key enum symbol
+#define BLEND_KEY(...) CONCAT_KEY(BLEND_, __VA_ARGS__, 0, 0, 0)
+#define MASK_BLEND_KEY(...) CONCAT_KEY(MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
+#define AA_BLEND_KEY(...) CONCAT_KEY(AA_BLEND_, __VA_ARGS__, 0, 0, 0)
+#define AA_MASK_BLEND_KEY(...) CONCAT_KEY(AA_MASK_BLEND_, __VA_ARGS__, 0, 0, 0)
+
+// Utility macro to easily generate similar code for all implemented blend modes
+#define FOR_EACH_BLEND_KEY(macro) \
+ macro(GL_ONE, GL_ZERO, 0, 0) \
+ macro(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
+ macro(GL_ONE, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
+ macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
+ macro(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE) \
+ macro(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA, 0, 0) \
+ macro(GL_ZERO, GL_SRC_COLOR, 0, 0) \
+ macro(GL_ONE, GL_ONE, 0, 0) \
+ macro(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA) \
+ macro(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE) \
+ macro(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR, 0, 0) \
+ macro(GL_ONE, GL_ONE_MINUS_SRC1_COLOR, 0, 0) \
+ macro(GL_MIN, 0, 0, 0) \
+ macro(GL_MAX, 0, 0, 0) \
+ macro(GL_MULTIPLY_KHR, 0, 0, 0) \
+ macro(GL_SCREEN_KHR, 0, 0, 0) \
+ macro(GL_OVERLAY_KHR, 0, 0, 0) \
+ macro(GL_DARKEN_KHR, 0, 0, 0) \
+ macro(GL_LIGHTEN_KHR, 0, 0, 0) \
+ macro(GL_COLORDODGE_KHR, 0, 0, 0) \
+ macro(GL_COLORBURN_KHR, 0, 0, 0) \
+ macro(GL_HARDLIGHT_KHR, 0, 0, 0) \
+ macro(GL_SOFTLIGHT_KHR, 0, 0, 0) \
+ macro(GL_DIFFERENCE_KHR, 0, 0, 0) \
+ macro(GL_EXCLUSION_KHR, 0, 0, 0) \
+ macro(GL_HSL_HUE_KHR, 0, 0, 0) \
+ macro(GL_HSL_SATURATION_KHR, 0, 0, 0) \
+ macro(GL_HSL_COLOR_KHR, 0, 0, 0) \
+ macro(GL_HSL_LUMINOSITY_KHR, 0, 0, 0) \
+ macro(SWGL_BLEND_DROP_SHADOW, 0, 0, 0) \
+ macro(SWGL_BLEND_SUBPIXEL_TEXT, 0, 0, 0)
+
+#define DEFINE_BLEND_KEY(...) BLEND_KEY(__VA_ARGS__),
+#define DEFINE_MASK_BLEND_KEY(...) MASK_BLEND_KEY(__VA_ARGS__),
+#define DEFINE_AA_BLEND_KEY(...) AA_BLEND_KEY(__VA_ARGS__),
+#define DEFINE_AA_MASK_BLEND_KEY(...) AA_MASK_BLEND_KEY(__VA_ARGS__),
+enum BlendKey : uint8_t {
+ FOR_EACH_BLEND_KEY(DEFINE_BLEND_KEY)
+ FOR_EACH_BLEND_KEY(DEFINE_MASK_BLEND_KEY)
+ FOR_EACH_BLEND_KEY(DEFINE_AA_BLEND_KEY)
+ FOR_EACH_BLEND_KEY(DEFINE_AA_MASK_BLEND_KEY)
+ BLEND_KEY_NONE = BLEND_KEY(GL_ONE, GL_ZERO),
+ MASK_BLEND_KEY_NONE = MASK_BLEND_KEY(GL_ONE, GL_ZERO),
+ AA_BLEND_KEY_NONE = AA_BLEND_KEY(GL_ONE, GL_ZERO),
+ AA_MASK_BLEND_KEY_NONE = AA_MASK_BLEND_KEY(GL_ONE, GL_ZERO),
+};
+// clang-format on
+
+const size_t MAX_TEXTURE_UNITS = 16;
+
+template <typename T>
+static inline bool unlink(T& binding, T n) {
+ if (binding == n) {
+ binding = 0;
+ return true;
+ }
+ return false;
+}
+
+template <typename O>
+struct ObjectStore {
+ O** objects = nullptr;
+ size_t size = 0;
+ // reserve object 0 as null
+ size_t first_free = 1;
+ O invalid;
+
+ ~ObjectStore() {
+ if (objects) {
+ for (size_t i = 0; i < size; i++) delete objects[i];
+ free(objects);
+ }
+ }
+
+ bool grow(size_t i) {
+ size_t new_size = size ? size : 8;
+ while (new_size <= i) new_size += new_size / 2;
+ O** new_objects = (O**)realloc(objects, new_size * sizeof(O*));
+ assert(new_objects);
+ if (!new_objects) return false;
+ while (size < new_size) new_objects[size++] = nullptr;
+ objects = new_objects;
+ return true;
+ }
+
+ void insert(size_t i, const O& o) {
+ if (i >= size && !grow(i)) return;
+ if (!objects[i]) objects[i] = new O(o);
+ }
+
+ size_t next_free() {
+ size_t i = first_free;
+ while (i < size && objects[i]) i++;
+ first_free = i;
+ return i;
+ }
+
+ size_t insert(const O& o = O()) {
+ size_t i = next_free();
+ insert(i, o);
+ return i;
+ }
+
+ O& operator[](size_t i) {
+ insert(i, O());
+ return i < size ? *objects[i] : invalid;
+ }
+
+ O* find(size_t i) const { return i < size ? objects[i] : nullptr; }
+
+ template <typename T>
+ void on_erase(T*, ...) {}
+ template <typename T>
+ void on_erase(T* o, decltype(&T::on_erase)) {
+ o->on_erase();
+ }
+
+ bool erase(size_t i) {
+ if (i < size && objects[i]) {
+ on_erase(objects[i], nullptr);
+ delete objects[i];
+ objects[i] = nullptr;
+ if (i < first_free) first_free = i;
+ return true;
+ }
+ return false;
+ }
+
+ O** begin() const { return objects; }
+ O** end() const { return &objects[size]; }
+};
+
+struct Context {
+ int32_t references = 1;
+
+ ObjectStore<Query> queries;
+ ObjectStore<Buffer> buffers;
+ ObjectStore<Texture> textures;
+ ObjectStore<VertexArray> vertex_arrays;
+ ObjectStore<Framebuffer> framebuffers;
+ ObjectStore<Renderbuffer> renderbuffers;
+ ObjectStore<Shader> shaders;
+ ObjectStore<Program> programs;
+
+ GLenum last_error = GL_NO_ERROR;
+
+ IntRect viewport = {0, 0, 0, 0};
+
+ bool blend = false;
+ GLenum blendfunc_srgb = GL_ONE;
+ GLenum blendfunc_drgb = GL_ZERO;
+ GLenum blendfunc_sa = GL_ONE;
+ GLenum blendfunc_da = GL_ZERO;
+ GLenum blend_equation = GL_FUNC_ADD;
+ V8<uint16_t> blendcolor = 0;
+ BlendKey blend_key = BLEND_KEY_NONE;
+
+ bool depthtest = false;
+ bool depthmask = true;
+ GLenum depthfunc = GL_LESS;
+
+ bool scissortest = false;
+ IntRect scissor = {0, 0, 0, 0};
+
+ GLfloat clearcolor[4] = {0, 0, 0, 0};
+ GLdouble cleardepth = 1;
+
+ int unpack_row_length = 0;
+
+ int shaded_rows = 0;
+ int shaded_pixels = 0;
+
+ struct TextureUnit {
+ GLuint texture_2d_binding = 0;
+ GLuint texture_rectangle_binding = 0;
+
+ void unlink(GLuint n) {
+ ::unlink(texture_2d_binding, n);
+ ::unlink(texture_rectangle_binding, n);
+ }
+ };
+ TextureUnit texture_units[MAX_TEXTURE_UNITS];
+ int active_texture_unit = 0;
+
+ GLuint current_program = 0;
+
+ GLuint current_vertex_array = 0;
+ bool validate_vertex_array = true;
+
+ GLuint pixel_pack_buffer_binding = 0;
+ GLuint pixel_unpack_buffer_binding = 0;
+ GLuint array_buffer_binding = 0;
+ GLuint time_elapsed_query = 0;
+ GLuint samples_passed_query = 0;
+ GLuint renderbuffer_binding = 0;
+ GLuint draw_framebuffer_binding = 0;
+ GLuint read_framebuffer_binding = 0;
+ GLuint unknown_binding = 0;
+
+ GLuint& get_binding(GLenum name) {
+ switch (name) {
+ case GL_PIXEL_PACK_BUFFER:
+ return pixel_pack_buffer_binding;
+ case GL_PIXEL_UNPACK_BUFFER:
+ return pixel_unpack_buffer_binding;
+ case GL_ARRAY_BUFFER:
+ return array_buffer_binding;
+ case GL_ELEMENT_ARRAY_BUFFER:
+ return vertex_arrays[current_vertex_array].element_array_buffer_binding;
+ case GL_TEXTURE_2D:
+ return texture_units[active_texture_unit].texture_2d_binding;
+ case GL_TEXTURE_RECTANGLE:
+ return texture_units[active_texture_unit].texture_rectangle_binding;
+ case GL_TIME_ELAPSED:
+ return time_elapsed_query;
+ case GL_SAMPLES_PASSED:
+ return samples_passed_query;
+ case GL_RENDERBUFFER:
+ return renderbuffer_binding;
+ case GL_DRAW_FRAMEBUFFER:
+ return draw_framebuffer_binding;
+ case GL_READ_FRAMEBUFFER:
+ return read_framebuffer_binding;
+ default:
+ debugf("unknown binding %x\n", name);
+ assert(false);
+ return unknown_binding;
+ }
+ }
+
+ Texture& get_texture(sampler2D, int unit) {
+ return textures[texture_units[unit].texture_2d_binding];
+ }
+
+ Texture& get_texture(isampler2D, int unit) {
+ return textures[texture_units[unit].texture_2d_binding];
+ }
+
+ Texture& get_texture(sampler2DRect, int unit) {
+ return textures[texture_units[unit].texture_rectangle_binding];
+ }
+
+ IntRect apply_scissor(IntRect bb,
+ const IntPoint& origin = IntPoint(0, 0)) const {
+ return scissortest ? bb.intersect(scissor - origin) : bb;
+ }
+
+ IntRect apply_scissor(const Texture& t) const {
+ return apply_scissor(t.bounds(), t.offset);
+ }
+};
+static Context* ctx = nullptr;
+static VertexShaderImpl* vertex_shader = nullptr;
+static FragmentShaderImpl* fragment_shader = nullptr;
+static BlendKey blend_key = BLEND_KEY_NONE;
+
+static void prepare_texture(Texture& t, const IntRect* skip = nullptr);
+
+template <typename S>
+static inline void init_filter(S* s, Texture& t) {
+ // If the width is not at least 2 pixels, then we can't safely sample the end
+ // of the row with a linear filter. In that case, just punt to using nearest
+ // filtering instead.
+ s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
+ : TextureFilter::NEAREST;
+}
+
+template <typename S>
+static inline void init_sampler(S* s, Texture& t) {
+ prepare_texture(t);
+ s->width = t.width;
+ s->height = t.height;
+ s->stride = t.stride();
+ int bpp = t.bpp();
+ if (bpp >= 4)
+ s->stride /= 4;
+ else if (bpp == 2)
+ s->stride /= 2;
+ else
+ assert(bpp == 1);
+ // Use uint32_t* for easier sampling, but need to cast to uint8_t* or
+ // uint16_t* for formats with bpp < 4.
+ s->buf = (uint32_t*)t.buf;
+ s->format = gl_format_to_texture_format(t.internal_format);
+}
+
+template <typename S>
+static inline void null_sampler(S* s) {
+ // For null texture data, just make the sampler provide a 1x1 buffer that is
+ // transparent black. Ensure buffer holds at least a SIMD vector of zero data
+ // for SIMD padding of unaligned loads.
+ static const uint32_t zeroBuf[sizeof(Float) / sizeof(uint32_t)] = {0};
+ s->width = 1;
+ s->height = 1;
+ s->stride = s->width;
+ s->buf = (uint32_t*)zeroBuf;
+ s->format = TextureFormat::RGBA8;
+}
+
+template <typename S>
+static inline void null_filter(S* s) {
+ s->filter = TextureFilter::NEAREST;
+}
+
+template <typename S>
+S* lookup_sampler(S* s, int texture) {
+ Texture& t = ctx->get_texture(s, texture);
+ if (!t.buf) {
+ null_sampler(s);
+ null_filter(s);
+ } else {
+ init_sampler(s, t);
+ init_filter(s, t);
+ }
+ return s;
+}
+
+template <typename S>
+S* lookup_isampler(S* s, int texture) {
+ Texture& t = ctx->get_texture(s, texture);
+ if (!t.buf) {
+ null_sampler(s);
+ } else {
+ init_sampler(s, t);
+ }
+ return s;
+}
+
+int bytes_per_type(GLenum type) {
+ switch (type) {
+ case GL_INT:
+ return 4;
+ case GL_FLOAT:
+ return 4;
+ case GL_UNSIGNED_SHORT:
+ return 2;
+ case GL_UNSIGNED_BYTE:
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+template <typename S, typename C>
+static inline S expand_attrib(const char* buf, size_t size, bool normalized) {
+ typedef typename ElementType<S>::ty elem_type;
+ S scalar = {0};
+ const C* src = reinterpret_cast<const C*>(buf);
+ if (normalized) {
+ const float scale = 1.0f / ((1 << (8 * sizeof(C))) - 1);
+ for (size_t i = 0; i < size / sizeof(C); i++) {
+ put_nth_component(scalar, i, elem_type(src[i]) * scale);
+ }
+ } else {
+ for (size_t i = 0; i < size / sizeof(C); i++) {
+ put_nth_component(scalar, i, elem_type(src[i]));
+ }
+ }
+ return scalar;
+}
+
+template <typename S>
+static inline S load_attrib_scalar(VertexAttrib& va, const char* src) {
+ if (sizeof(S) <= va.size) {
+ return *reinterpret_cast<const S*>(src);
+ }
+ if (va.type == GL_UNSIGNED_SHORT) {
+ return expand_attrib<S, uint16_t>(src, va.size, va.normalized);
+ }
+ if (va.type == GL_UNSIGNED_BYTE) {
+ return expand_attrib<S, uint8_t>(src, va.size, va.normalized);
+ }
+ assert(sizeof(typename ElementType<S>::ty) == bytes_per_type(va.type));
+ S scalar = {0};
+ memcpy(&scalar, src, va.size);
+ return scalar;
+}
+
+template <typename T>
+void load_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
+ int count) {
+ typedef decltype(force_scalar(attrib)) scalar_type;
+ // If no buffer is available, just use a zero default.
+ if (!va.buf_size) {
+ attrib = T(scalar_type{0});
+ } else if (va.divisor != 0) {
+ char* src = (char*)va.buf + va.stride * instance + va.offset;
+ assert(src + va.size <= va.buf + va.buf_size);
+ attrib = T(load_attrib_scalar<scalar_type>(va, src));
+ } else {
+ // Specialized for WR's primitive vertex order/winding.
+ if (!count) return;
+ assert(count >= 2 && count <= 4);
+ char* src = (char*)va.buf + va.stride * start + va.offset;
+ switch (count) {
+ case 2: {
+ // Lines must be indexed at offsets 0, 1.
+ // Line vertexes fill vertex shader SIMD lanes as 0, 1, 1, 0.
+ scalar_type lanes[2] = {
+ load_attrib_scalar<scalar_type>(va, src),
+ load_attrib_scalar<scalar_type>(va, src + va.stride)};
+ attrib = (T){lanes[0], lanes[1], lanes[1], lanes[0]};
+ break;
+ }
+ case 3: {
+ // Triangles must be indexed at offsets 0, 1, 2.
+ // Triangle vertexes fill vertex shader SIMD lanes as 0, 1, 2, 2.
+ scalar_type lanes[3] = {
+ load_attrib_scalar<scalar_type>(va, src),
+ load_attrib_scalar<scalar_type>(va, src + va.stride),
+ load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
+ attrib = (T){lanes[0], lanes[1], lanes[2], lanes[2]};
+ break;
+ }
+ default:
+ // Quads must be successive triangles indexed at offsets 0, 1, 2, 2,
+ // 1, 3. Quad vertexes fill vertex shader SIMD lanes as 0, 1, 3, 2, so
+ // that the points form a convex path that can be traversed by the
+ // rasterizer.
+ attrib = (T){load_attrib_scalar<scalar_type>(va, src),
+ load_attrib_scalar<scalar_type>(va, src + va.stride),
+ load_attrib_scalar<scalar_type>(va, src + va.stride * 3),
+ load_attrib_scalar<scalar_type>(va, src + va.stride * 2)};
+ break;
+ }
+ }
+}
+
+template <typename T>
+void load_flat_attrib(T& attrib, VertexAttrib& va, uint32_t start, int instance,
+ int count) {
+ typedef decltype(force_scalar(attrib)) scalar_type;
+ // If no buffer is available, just use a zero default.
+ if (!va.buf_size) {
+ attrib = T{0};
+ return;
+ }
+ char* src = nullptr;
+ if (va.divisor != 0) {
+ src = (char*)va.buf + va.stride * instance + va.offset;
+ } else {
+ if (!count) return;
+ src = (char*)va.buf + va.stride * start + va.offset;
+ }
+ assert(src + va.size <= va.buf + va.buf_size);
+ attrib = T(load_attrib_scalar<scalar_type>(va, src));
+}
+
+void setup_program(GLuint program) {
+ if (!program) {
+ vertex_shader = nullptr;
+ fragment_shader = nullptr;
+ return;
+ }
+ Program& p = ctx->programs[program];
+ assert(p.impl);
+ assert(p.vert_impl);
+ assert(p.frag_impl);
+ vertex_shader = p.vert_impl;
+ fragment_shader = p.frag_impl;
+}
+
+extern ProgramLoader load_shader(const char* name);
+
+extern "C" {
+
+void UseProgram(GLuint program) {
+ if (ctx->current_program && program != ctx->current_program) {
+ auto* p = ctx->programs.find(ctx->current_program);
+ if (p && p->deleted) {
+ ctx->programs.erase(ctx->current_program);
+ }
+ }
+ ctx->current_program = program;
+ setup_program(program);
+}
+
+void SetViewport(GLint x, GLint y, GLsizei width, GLsizei height) {
+ ctx->viewport = IntRect{x, y, x + width, y + height};
+}
+
+void Enable(GLenum cap) {
+ switch (cap) {
+ case GL_BLEND:
+ ctx->blend = true;
+ break;
+ case GL_DEPTH_TEST:
+ ctx->depthtest = true;
+ break;
+ case GL_SCISSOR_TEST:
+ ctx->scissortest = true;
+ break;
+ }
+}
+
+void Disable(GLenum cap) {
+ switch (cap) {
+ case GL_BLEND:
+ ctx->blend = false;
+ break;
+ case GL_DEPTH_TEST:
+ ctx->depthtest = false;
+ break;
+ case GL_SCISSOR_TEST:
+ ctx->scissortest = false;
+ break;
+ }
+}
+
+// Report the last error generated and clear the error status.
+GLenum GetError() {
+ GLenum error = ctx->last_error;
+ ctx->last_error = GL_NO_ERROR;
+ return error;
+}
+
+// Sets the error status to out-of-memory to indicate that a buffer
+// or texture re-allocation failed.
+static void out_of_memory() { ctx->last_error = GL_OUT_OF_MEMORY; }
+
+static const char* const extensions[] = {
+ "GL_ARB_blend_func_extended",
+ "GL_ARB_clear_texture",
+ "GL_ARB_copy_image",
+ "GL_ARB_draw_instanced",
+ "GL_ARB_explicit_attrib_location",
+ "GL_ARB_instanced_arrays",
+ "GL_ARB_invalidate_subdata",
+ "GL_ARB_texture_storage",
+ "GL_EXT_timer_query",
+ "GL_KHR_blend_equation_advanced",
+ "GL_KHR_blend_equation_advanced_coherent",
+ "GL_APPLE_rgb_422",
+};
+
+void GetIntegerv(GLenum pname, GLint* params) {
+ assert(params);
+ switch (pname) {
+ case GL_MAX_TEXTURE_UNITS:
+ case GL_MAX_TEXTURE_IMAGE_UNITS:
+ params[0] = MAX_TEXTURE_UNITS;
+ break;
+ case GL_MAX_TEXTURE_SIZE:
+ params[0] = 1 << 15;
+ break;
+ case GL_MAX_ARRAY_TEXTURE_LAYERS:
+ params[0] = 0;
+ break;
+ case GL_READ_FRAMEBUFFER_BINDING:
+ params[0] = ctx->read_framebuffer_binding;
+ break;
+ case GL_DRAW_FRAMEBUFFER_BINDING:
+ params[0] = ctx->draw_framebuffer_binding;
+ break;
+ case GL_PIXEL_PACK_BUFFER_BINDING:
+ params[0] = ctx->pixel_pack_buffer_binding;
+ break;
+ case GL_PIXEL_UNPACK_BUFFER_BINDING:
+ params[0] = ctx->pixel_unpack_buffer_binding;
+ break;
+ case GL_NUM_EXTENSIONS:
+ params[0] = sizeof(extensions) / sizeof(extensions[0]);
+ break;
+ case GL_MAJOR_VERSION:
+ params[0] = 3;
+ break;
+ case GL_MINOR_VERSION:
+ params[0] = 2;
+ break;
+ case GL_MIN_PROGRAM_TEXEL_OFFSET:
+ params[0] = 0;
+ break;
+ case GL_MAX_PROGRAM_TEXEL_OFFSET:
+ params[0] = MAX_TEXEL_OFFSET;
+ break;
+ default:
+ debugf("unhandled glGetIntegerv parameter %x\n", pname);
+ assert(false);
+ }
+}
+
+void GetBooleanv(GLenum pname, GLboolean* params) {
+ assert(params);
+ switch (pname) {
+ case GL_DEPTH_WRITEMASK:
+ params[0] = ctx->depthmask;
+ break;
+ default:
+ debugf("unhandled glGetBooleanv parameter %x\n", pname);
+ assert(false);
+ }
+}
+
+const char* GetString(GLenum name) {
+ switch (name) {
+ case GL_VENDOR:
+ return "Mozilla Gfx";
+ case GL_RENDERER:
+ return "Software WebRender";
+ case GL_VERSION:
+ return "3.2";
+ case GL_SHADING_LANGUAGE_VERSION:
+ return "1.50";
+ default:
+ debugf("unhandled glGetString parameter %x\n", name);
+ assert(false);
+ return nullptr;
+ }
+}
+
+const char* GetStringi(GLenum name, GLuint index) {
+ switch (name) {
+ case GL_EXTENSIONS:
+ if (index >= sizeof(extensions) / sizeof(extensions[0])) {
+ return nullptr;
+ }
+ return extensions[index];
+ default:
+ debugf("unhandled glGetStringi parameter %x\n", name);
+ assert(false);
+ return nullptr;
+ }
+}
+
+GLenum remap_blendfunc(GLenum rgb, GLenum a) {
+ switch (a) {
+ case GL_SRC_ALPHA:
+ if (rgb == GL_SRC_COLOR) a = GL_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ if (rgb == GL_ONE_MINUS_SRC_COLOR) a = GL_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_DST_ALPHA:
+ if (rgb == GL_DST_COLOR) a = GL_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ if (rgb == GL_ONE_MINUS_DST_COLOR) a = GL_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ if (rgb == GL_CONSTANT_COLOR) a = GL_CONSTANT_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ if (rgb == GL_ONE_MINUS_CONSTANT_COLOR) a = GL_ONE_MINUS_CONSTANT_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ if (rgb == GL_SRC_ALPHA) a = GL_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ if (rgb == GL_ONE_MINUS_SRC_ALPHA) a = GL_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_COLOR:
+ if (rgb == GL_DST_ALPHA) a = GL_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ if (rgb == GL_ONE_MINUS_DST_ALPHA) a = GL_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_CONSTANT_COLOR:
+ if (rgb == GL_CONSTANT_ALPHA) a = GL_CONSTANT_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ if (rgb == GL_ONE_MINUS_CONSTANT_ALPHA) a = GL_ONE_MINUS_CONSTANT_ALPHA;
+ break;
+ case GL_SRC1_ALPHA:
+ if (rgb == GL_SRC1_COLOR) a = GL_SRC1_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC1_ALPHA:
+ if (rgb == GL_ONE_MINUS_SRC1_COLOR) a = GL_ONE_MINUS_SRC1_COLOR;
+ break;
+ case GL_SRC1_COLOR:
+ if (rgb == GL_SRC1_ALPHA) a = GL_SRC1_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC1_COLOR:
+ if (rgb == GL_ONE_MINUS_SRC1_ALPHA) a = GL_ONE_MINUS_SRC1_ALPHA;
+ break;
+ }
+ return a;
+}
+
+// Generate a hashed blend key based on blend func and equation state. This
+// allows all the blend state to be processed down to a blend key that can be
+// dealt with inside a single switch statement.
+static void hash_blend_key() {
+ GLenum srgb = ctx->blendfunc_srgb;
+ GLenum drgb = ctx->blendfunc_drgb;
+ GLenum sa = ctx->blendfunc_sa;
+ GLenum da = ctx->blendfunc_da;
+ GLenum equation = ctx->blend_equation;
+#define HASH_BLEND_KEY(x, y, z, w) ((x << 4) | (y) | (z << 24) | (w << 20))
+ // Basic non-separate blend funcs used the two argument form
+ int hash = HASH_BLEND_KEY(srgb, drgb, 0, 0);
+ // Separate alpha blend funcs use the 4 argument hash
+ if (srgb != sa || drgb != da) hash |= HASH_BLEND_KEY(0, 0, sa, da);
+ // Any other blend equation than the default func_add ignores the func and
+ // instead generates a one-argument hash based on the equation
+ if (equation != GL_FUNC_ADD) hash = HASH_BLEND_KEY(equation, 0, 0, 0);
+ switch (hash) {
+#define MAP_BLEND_KEY(...) \
+ case HASH_BLEND_KEY(__VA_ARGS__): \
+ ctx->blend_key = BLEND_KEY(__VA_ARGS__); \
+ break;
+ FOR_EACH_BLEND_KEY(MAP_BLEND_KEY)
+ default:
+ debugf("blendfunc: %x, %x, separate: %x, %x, equation: %x\n", srgb, drgb,
+ sa, da, equation);
+ assert(false);
+ break;
+ }
+}
+
+void BlendFunc(GLenum srgb, GLenum drgb, GLenum sa, GLenum da) {
+ ctx->blendfunc_srgb = srgb;
+ ctx->blendfunc_drgb = drgb;
+ sa = remap_blendfunc(srgb, sa);
+ da = remap_blendfunc(drgb, da);
+ ctx->blendfunc_sa = sa;
+ ctx->blendfunc_da = da;
+
+ hash_blend_key();
+}
+
+void BlendColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
+ I32 c = round_pixel((Float){b, g, r, a});
+ ctx->blendcolor = CONVERT(c, U16).xyzwxyzw;
+}
+
+void BlendEquation(GLenum mode) {
+ assert(mode == GL_FUNC_ADD || mode == GL_MIN || mode == GL_MAX ||
+ (mode >= GL_MULTIPLY_KHR && mode <= GL_HSL_LUMINOSITY_KHR));
+ if (mode != ctx->blend_equation) {
+ ctx->blend_equation = mode;
+ hash_blend_key();
+ }
+}
+
+void DepthMask(GLboolean flag) { ctx->depthmask = flag; }
+
+void DepthFunc(GLenum func) {
+ switch (func) {
+ case GL_LESS:
+ case GL_LEQUAL:
+ break;
+ default:
+ assert(false);
+ }
+ ctx->depthfunc = func;
+}
+
+void SetScissor(GLint x, GLint y, GLsizei width, GLsizei height) {
+ ctx->scissor = IntRect{x, y, x + width, y + height};
+}
+
+void ClearColor(GLfloat r, GLfloat g, GLfloat b, GLfloat a) {
+ ctx->clearcolor[0] = r;
+ ctx->clearcolor[1] = g;
+ ctx->clearcolor[2] = b;
+ ctx->clearcolor[3] = a;
+}
+
+void ClearDepth(GLdouble depth) { ctx->cleardepth = depth; }
+
+void ActiveTexture(GLenum texture) {
+ assert(texture >= GL_TEXTURE0);
+ assert(texture < GL_TEXTURE0 + MAX_TEXTURE_UNITS);
+ ctx->active_texture_unit =
+ clamp(int(texture - GL_TEXTURE0), 0, int(MAX_TEXTURE_UNITS - 1));
+}
+
+void GenQueries(GLsizei n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ Query q;
+ result[i] = ctx->queries.insert(q);
+ }
+}
+
+void DeleteQuery(GLuint n) {
+ if (n && ctx->queries.erase(n)) {
+ unlink(ctx->time_elapsed_query, n);
+ unlink(ctx->samples_passed_query, n);
+ }
+}
+
+void GenBuffers(int n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ Buffer b;
+ result[i] = ctx->buffers.insert(b);
+ }
+}
+
+void DeleteBuffer(GLuint n) {
+ if (n && ctx->buffers.erase(n)) {
+ unlink(ctx->pixel_pack_buffer_binding, n);
+ unlink(ctx->pixel_unpack_buffer_binding, n);
+ unlink(ctx->array_buffer_binding, n);
+ }
+}
+
+void GenVertexArrays(int n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ VertexArray v;
+ result[i] = ctx->vertex_arrays.insert(v);
+ }
+}
+
+void DeleteVertexArray(GLuint n) {
+ if (n && ctx->vertex_arrays.erase(n)) {
+ unlink(ctx->current_vertex_array, n);
+ }
+}
+
+GLuint CreateShader(GLenum type) {
+ Shader s;
+ s.type = type;
+ return ctx->shaders.insert(s);
+}
+
+void ShaderSourceByName(GLuint shader, char* name) {
+ Shader& s = ctx->shaders[shader];
+ s.loader = load_shader(name);
+ if (!s.loader) {
+ debugf("unknown shader %s\n", name);
+ }
+}
+
+void AttachShader(GLuint program, GLuint shader) {
+ Program& p = ctx->programs[program];
+ Shader& s = ctx->shaders[shader];
+ if (s.type == GL_VERTEX_SHADER) {
+ if (!p.impl && s.loader) p.impl = s.loader();
+ } else if (s.type == GL_FRAGMENT_SHADER) {
+ if (!p.impl && s.loader) p.impl = s.loader();
+ } else {
+ assert(0);
+ }
+}
+
+void DeleteShader(GLuint n) {
+ if (n) ctx->shaders.erase(n);
+}
+
+GLuint CreateProgram() {
+ Program p;
+ return ctx->programs.insert(p);
+}
+
+void DeleteProgram(GLuint n) {
+ if (!n) return;
+ if (ctx->current_program == n) {
+ if (auto* p = ctx->programs.find(n)) {
+ p->deleted = true;
+ }
+ } else {
+ ctx->programs.erase(n);
+ }
+}
+
+void LinkProgram(GLuint program) {
+ Program& p = ctx->programs[program];
+ assert(p.impl);
+ if (!p.impl) {
+ return;
+ }
+ assert(p.impl->interpolants_size() <= sizeof(Interpolants));
+ if (!p.vert_impl) p.vert_impl = p.impl->get_vertex_shader();
+ if (!p.frag_impl) p.frag_impl = p.impl->get_fragment_shader();
+}
+
+GLint GetLinkStatus(GLuint program) {
+ if (auto* p = ctx->programs.find(program)) {
+ return p->impl ? 1 : 0;
+ }
+ return 0;
+}
+
+void BindAttribLocation(GLuint program, GLuint index, char* name) {
+ Program& p = ctx->programs[program];
+ assert(p.impl);
+ if (!p.impl) {
+ return;
+ }
+ p.impl->bind_attrib(name, index);
+}
+
+GLint GetAttribLocation(GLuint program, char* name) {
+ Program& p = ctx->programs[program];
+ assert(p.impl);
+ if (!p.impl) {
+ return -1;
+ }
+ return p.impl->get_attrib(name);
+}
+
+GLint GetUniformLocation(GLuint program, char* name) {
+ Program& p = ctx->programs[program];
+ assert(p.impl);
+ if (!p.impl) {
+ return -1;
+ }
+ GLint loc = p.impl->get_uniform(name);
+ // debugf("location: %d\n", loc);
+ return loc;
+}
+
+static uint64_t get_time_value() {
+#ifdef __MACH__
+ return mach_absolute_time();
+#elif defined(_WIN32)
+ LARGE_INTEGER time;
+ static bool have_frequency = false;
+ static LARGE_INTEGER frequency;
+ if (!have_frequency) {
+ QueryPerformanceFrequency(&frequency);
+ have_frequency = true;
+ }
+ QueryPerformanceCounter(&time);
+ return time.QuadPart * 1000000000ULL / frequency.QuadPart;
+#else
+ return ({
+ struct timespec tp;
+ clock_gettime(CLOCK_MONOTONIC, &tp);
+ tp.tv_sec * 1000000000ULL + tp.tv_nsec;
+ });
+#endif
+}
+
+void BeginQuery(GLenum target, GLuint id) {
+ ctx->get_binding(target) = id;
+ Query& q = ctx->queries[id];
+ switch (target) {
+ case GL_SAMPLES_PASSED:
+ q.value = 0;
+ break;
+ case GL_TIME_ELAPSED:
+ q.value = get_time_value();
+ break;
+ default:
+ debugf("unknown query target %x for query %d\n", target, id);
+ assert(false);
+ }
+}
+
+void EndQuery(GLenum target) {
+ Query& q = ctx->queries[ctx->get_binding(target)];
+ switch (target) {
+ case GL_SAMPLES_PASSED:
+ break;
+ case GL_TIME_ELAPSED:
+ q.value = get_time_value() - q.value;
+ break;
+ default:
+ debugf("unknown query target %x\n", target);
+ assert(false);
+ }
+ ctx->get_binding(target) = 0;
+}
+
+void GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params) {
+ Query& q = ctx->queries[id];
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ assert(params);
+ params[0] = q.value;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+void BindVertexArray(GLuint vertex_array) {
+ if (vertex_array != ctx->current_vertex_array) {
+ ctx->validate_vertex_array = true;
+ }
+ ctx->current_vertex_array = vertex_array;
+}
+
+void BindTexture(GLenum target, GLuint texture) {
+ ctx->get_binding(target) = texture;
+}
+
+void BindBuffer(GLenum target, GLuint buffer) {
+ ctx->get_binding(target) = buffer;
+}
+
+void BindFramebuffer(GLenum target, GLuint fb) {
+ if (target == GL_FRAMEBUFFER) {
+ ctx->read_framebuffer_binding = fb;
+ ctx->draw_framebuffer_binding = fb;
+ } else {
+ assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
+ ctx->get_binding(target) = fb;
+ }
+}
+
+void BindRenderbuffer(GLenum target, GLuint rb) {
+ ctx->get_binding(target) = rb;
+}
+
+void PixelStorei(GLenum name, GLint param) {
+ if (name == GL_UNPACK_ALIGNMENT) {
+ assert(param == 1);
+ } else if (name == GL_UNPACK_ROW_LENGTH) {
+ ctx->unpack_row_length = param;
+ }
+}
+
+static GLenum remap_internal_format(GLenum format) {
+ switch (format) {
+ case GL_DEPTH_COMPONENT:
+ return GL_DEPTH_COMPONENT24;
+ case GL_RGBA:
+ return GL_RGBA8;
+ case GL_RED:
+ return GL_R8;
+ case GL_RG:
+ return GL_RG8;
+ case GL_RGB_422_APPLE:
+ return GL_RGB_RAW_422_APPLE;
+ default:
+ return format;
+ }
+}
+
+} // extern "C"
+
+static bool format_requires_conversion(GLenum external_format,
+ GLenum internal_format) {
+ switch (external_format) {
+ case GL_RGBA:
+ return internal_format == GL_RGBA8;
+ default:
+ return false;
+ }
+}
+
+static inline void copy_bgra8_to_rgba8(uint32_t* dest, const uint32_t* src,
+ int width) {
+ for (; width >= 4; width -= 4, dest += 4, src += 4) {
+ U32 p = unaligned_load<U32>(src);
+ U32 rb = p & 0x00FF00FF;
+ unaligned_store(dest, (p & 0xFF00FF00) | (rb << 16) | (rb >> 16));
+ }
+ for (; width > 0; width--, dest++, src++) {
+ uint32_t p = *src;
+ uint32_t rb = p & 0x00FF00FF;
+ *dest = (p & 0xFF00FF00) | (rb << 16) | (rb >> 16);
+ }
+}
+
+static void convert_copy(GLenum external_format, GLenum internal_format,
+ uint8_t* dst_buf, size_t dst_stride,
+ const uint8_t* src_buf, size_t src_stride,
+ size_t width, size_t height) {
+ switch (external_format) {
+ case GL_RGBA:
+ if (internal_format == GL_RGBA8) {
+ for (; height; height--) {
+ copy_bgra8_to_rgba8((uint32_t*)dst_buf, (const uint32_t*)src_buf,
+ width);
+ dst_buf += dst_stride;
+ src_buf += src_stride;
+ }
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ size_t row_bytes = width * bytes_for_internal_format(internal_format);
+ for (; height; height--) {
+ memcpy(dst_buf, src_buf, row_bytes);
+ dst_buf += dst_stride;
+ src_buf += src_stride;
+ }
+}
+
+static void set_tex_storage(Texture& t, GLenum external_format, GLsizei width,
+ GLsizei height, void* buf = nullptr,
+ GLsizei stride = 0, GLsizei min_width = 0,
+ GLsizei min_height = 0) {
+ GLenum internal_format = remap_internal_format(external_format);
+ bool changed = false;
+ if (t.width != width || t.height != height ||
+ t.internal_format != internal_format) {
+ changed = true;
+ t.internal_format = internal_format;
+ t.width = width;
+ t.height = height;
+ }
+ // If we are changed from an internally managed buffer to an externally
+ // supplied one or vice versa, ensure that we clean up old buffer state.
+ // However, if we have to convert the data from a non-native format, then
+ // always treat it as internally managed since we will need to copy to an
+ // internally managed native format buffer.
+ bool should_free = buf == nullptr || format_requires_conversion(
+ external_format, internal_format);
+ if (t.should_free() != should_free) {
+ changed = true;
+ t.cleanup();
+ t.set_should_free(should_free);
+ }
+ // If now an external buffer, explicitly set it...
+ if (!should_free) {
+ t.set_buffer(buf, stride);
+ }
+ t.disable_delayed_clear();
+ if (!t.allocate(changed, min_width, min_height)) {
+ out_of_memory();
+ }
+ // If we have a buffer that needs format conversion, then do that now.
+ if (buf && should_free) {
+ convert_copy(external_format, internal_format, (uint8_t*)t.buf, t.stride(),
+ (const uint8_t*)buf, stride, width, height);
+ }
+}
+
+extern "C" {
+
+void TexStorage2D(GLenum target, GLint levels, GLenum internal_format,
+ GLsizei width, GLsizei height) {
+ assert(levels == 1);
+ Texture& t = ctx->textures[ctx->get_binding(target)];
+ set_tex_storage(t, internal_format, width, height);
+}
+
+GLenum internal_format_for_data(GLenum format, GLenum ty) {
+ if (format == GL_RED && ty == GL_UNSIGNED_BYTE) {
+ return GL_R8;
+ } else if ((format == GL_RGBA || format == GL_BGRA) &&
+ (ty == GL_UNSIGNED_BYTE || ty == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+ return GL_RGBA8;
+ } else if (format == GL_RGBA && ty == GL_FLOAT) {
+ return GL_RGBA32F;
+ } else if (format == GL_RGBA_INTEGER && ty == GL_INT) {
+ return GL_RGBA32I;
+ } else if (format == GL_RG && ty == GL_UNSIGNED_BYTE) {
+ return GL_RG8;
+ } else if (format == GL_RGB_422_APPLE &&
+ ty == GL_UNSIGNED_SHORT_8_8_REV_APPLE) {
+ return GL_RGB_RAW_422_APPLE;
+ } else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
+ return GL_R16;
+ } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
+ return GL_RG16;
+ } else {
+ debugf("unknown internal format for format %x, type %x\n", format, ty);
+ assert(false);
+ return 0;
+ }
+}
+
+static Buffer* get_pixel_pack_buffer() {
+ return ctx->pixel_pack_buffer_binding
+ ? &ctx->buffers[ctx->pixel_pack_buffer_binding]
+ : nullptr;
+}
+
+static void* get_pixel_pack_buffer_data(void* data) {
+ if (Buffer* b = get_pixel_pack_buffer()) {
+ return b->buf ? b->buf + (size_t)data : nullptr;
+ }
+ return data;
+}
+
+static Buffer* get_pixel_unpack_buffer() {
+ return ctx->pixel_unpack_buffer_binding
+ ? &ctx->buffers[ctx->pixel_unpack_buffer_binding]
+ : nullptr;
+}
+
+static void* get_pixel_unpack_buffer_data(void* data) {
+ if (Buffer* b = get_pixel_unpack_buffer()) {
+ return b->buf ? b->buf + (size_t)data : nullptr;
+ }
+ return data;
+}
+
+void TexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset,
+ GLsizei width, GLsizei height, GLenum format, GLenum ty,
+ void* data) {
+ if (level != 0) {
+ assert(false);
+ return;
+ }
+ data = get_pixel_unpack_buffer_data(data);
+ if (!data) return;
+ Texture& t = ctx->textures[ctx->get_binding(target)];
+ IntRect skip = {xoffset, yoffset, xoffset + width, yoffset + height};
+ prepare_texture(t, &skip);
+ assert(xoffset + width <= t.width);
+ assert(yoffset + height <= t.height);
+ assert(ctx->unpack_row_length == 0 || ctx->unpack_row_length >= width);
+ GLsizei row_length =
+ ctx->unpack_row_length != 0 ? ctx->unpack_row_length : width;
+ assert(t.internal_format == internal_format_for_data(format, ty));
+ int src_bpp = format_requires_conversion(format, t.internal_format)
+ ? bytes_for_internal_format(format)
+ : t.bpp();
+ if (!src_bpp || !t.buf) return;
+ convert_copy(format, t.internal_format,
+ (uint8_t*)t.sample_ptr(xoffset, yoffset), t.stride(),
+ (const uint8_t*)data, row_length * src_bpp, width, height);
+}
+
+void TexImage2D(GLenum target, GLint level, GLint internal_format,
+ GLsizei width, GLsizei height, GLint border, GLenum format,
+ GLenum ty, void* data) {
+ if (level != 0) {
+ assert(false);
+ return;
+ }
+ assert(border == 0);
+ TexStorage2D(target, 1, internal_format, width, height);
+ TexSubImage2D(target, 0, 0, 0, width, height, format, ty, data);
+}
+
+void GenerateMipmap(UNUSED GLenum target) {
+ // TODO: support mipmaps
+}
+
+void SetTextureParameter(GLuint texid, GLenum pname, GLint param) {
+ Texture& t = ctx->textures[texid];
+ switch (pname) {
+ case GL_TEXTURE_WRAP_S:
+ assert(param == GL_CLAMP_TO_EDGE);
+ break;
+ case GL_TEXTURE_WRAP_T:
+ assert(param == GL_CLAMP_TO_EDGE);
+ break;
+ case GL_TEXTURE_MIN_FILTER:
+ t.min_filter = param;
+ break;
+ case GL_TEXTURE_MAG_FILTER:
+ t.mag_filter = param;
+ break;
+ default:
+ break;
+ }
+}
+
+void TexParameteri(GLenum target, GLenum pname, GLint param) {
+ SetTextureParameter(ctx->get_binding(target), pname, param);
+}
+
+void GenTextures(int n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ Texture t;
+ result[i] = ctx->textures.insert(t);
+ }
+}
+
+void DeleteTexture(GLuint n) {
+ if (n && ctx->textures.erase(n)) {
+ for (size_t i = 0; i < MAX_TEXTURE_UNITS; i++) {
+ ctx->texture_units[i].unlink(n);
+ }
+ }
+}
+
+void GenRenderbuffers(int n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ Renderbuffer r;
+ result[i] = ctx->renderbuffers.insert(r);
+ }
+}
+
+void Renderbuffer::on_erase() {
+ for (auto* fb : ctx->framebuffers) {
+ if (fb) {
+ unlink(fb->color_attachment, texture);
+ unlink(fb->depth_attachment, texture);
+ }
+ }
+ DeleteTexture(texture);
+}
+
+void DeleteRenderbuffer(GLuint n) {
+ if (n && ctx->renderbuffers.erase(n)) {
+ unlink(ctx->renderbuffer_binding, n);
+ }
+}
+
+void GenFramebuffers(int n, GLuint* result) {
+ for (int i = 0; i < n; i++) {
+ Framebuffer f;
+ result[i] = ctx->framebuffers.insert(f);
+ }
+}
+
+void DeleteFramebuffer(GLuint n) {
+ if (n && ctx->framebuffers.erase(n)) {
+ unlink(ctx->read_framebuffer_binding, n);
+ unlink(ctx->draw_framebuffer_binding, n);
+ }
+}
+
+void RenderbufferStorage(GLenum target, GLenum internal_format, GLsizei width,
+ GLsizei height) {
+ // Just refer a renderbuffer to a texture to simplify things for now...
+ Renderbuffer& r = ctx->renderbuffers[ctx->get_binding(target)];
+ if (!r.texture) {
+ GenTextures(1, &r.texture);
+ }
+ switch (internal_format) {
+ case GL_DEPTH_COMPONENT:
+ case GL_DEPTH_COMPONENT16:
+ case GL_DEPTH_COMPONENT24:
+ case GL_DEPTH_COMPONENT32:
+ // Force depth format to 24 bits...
+ internal_format = GL_DEPTH_COMPONENT24;
+ break;
+ }
+ set_tex_storage(ctx->textures[r.texture], internal_format, width, height);
+}
+
+void VertexAttribPointer(GLuint index, GLint size, GLenum type, bool normalized,
+ GLsizei stride, GLuint offset) {
+ // debugf("cva: %d\n", ctx->current_vertex_array);
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ if (index >= NULL_ATTRIB) {
+ assert(0);
+ return;
+ }
+ VertexAttrib& va = v.attribs[index];
+ va.size = size * bytes_per_type(type);
+ va.type = type;
+ va.normalized = normalized;
+ va.stride = stride;
+ va.offset = offset;
+ // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
+ va.vertex_buffer = ctx->array_buffer_binding;
+ va.vertex_array = ctx->current_vertex_array;
+ ctx->validate_vertex_array = true;
+}
+
+void VertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride,
+ GLuint offset) {
+ // debugf("cva: %d\n", ctx->current_vertex_array);
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ if (index >= NULL_ATTRIB) {
+ assert(0);
+ return;
+ }
+ VertexAttrib& va = v.attribs[index];
+ va.size = size * bytes_per_type(type);
+ va.type = type;
+ va.normalized = false;
+ va.stride = stride;
+ va.offset = offset;
+ // Buffer &vertex_buf = ctx->buffers[ctx->array_buffer_binding];
+ va.vertex_buffer = ctx->array_buffer_binding;
+ va.vertex_array = ctx->current_vertex_array;
+ ctx->validate_vertex_array = true;
+}
+
+void EnableVertexAttribArray(GLuint index) {
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ if (index >= NULL_ATTRIB) {
+ assert(0);
+ return;
+ }
+ VertexAttrib& va = v.attribs[index];
+ if (!va.enabled) {
+ ctx->validate_vertex_array = true;
+ }
+ va.enabled = true;
+ v.max_attrib = max(v.max_attrib, (int)index);
+}
+
+void DisableVertexAttribArray(GLuint index) {
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ if (index >= NULL_ATTRIB) {
+ assert(0);
+ return;
+ }
+ VertexAttrib& va = v.attribs[index];
+ if (va.enabled) {
+ ctx->validate_vertex_array = true;
+ }
+ va.disable();
+}
+
+void VertexAttribDivisor(GLuint index, GLuint divisor) {
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ // Only support divisor being 0 (per-vertex) or 1 (per-instance).
+ if (index >= NULL_ATTRIB || divisor > 1) {
+ assert(0);
+ return;
+ }
+ VertexAttrib& va = v.attribs[index];
+ va.divisor = divisor;
+}
+
+void BufferData(GLenum target, GLsizeiptr size, void* data,
+ UNUSED GLenum usage) {
+ Buffer& b = ctx->buffers[ctx->get_binding(target)];
+ if (size != b.size) {
+ if (!b.allocate(size)) {
+ out_of_memory();
+ }
+ ctx->validate_vertex_array = true;
+ }
+ if (data && b.buf && size <= b.size) {
+ memcpy(b.buf, data, size);
+ }
+}
+
+void BufferSubData(GLenum target, GLintptr offset, GLsizeiptr size,
+ void* data) {
+ Buffer& b = ctx->buffers[ctx->get_binding(target)];
+ assert(offset + size <= b.size);
+ if (data && b.buf && offset + size <= b.size) {
+ memcpy(&b.buf[offset], data, size);
+ }
+}
+
+void* MapBuffer(GLenum target, UNUSED GLbitfield access) {
+ Buffer& b = ctx->buffers[ctx->get_binding(target)];
+ return b.buf;
+}
+
+void* MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
+ UNUSED GLbitfield access) {
+ Buffer& b = ctx->buffers[ctx->get_binding(target)];
+ if (b.buf && offset >= 0 && length > 0 && offset + length <= b.size) {
+ return b.buf + offset;
+ }
+ return nullptr;
+}
+
+GLboolean UnmapBuffer(GLenum target) {
+ Buffer& b = ctx->buffers[ctx->get_binding(target)];
+ return b.buf != nullptr;
+}
+
+void Uniform1i(GLint location, GLint V0) {
+ // debugf("tex: %d\n", (int)ctx->textures.size);
+ if (vertex_shader) {
+ vertex_shader->set_uniform_1i(location, V0);
+ }
+}
+void Uniform4fv(GLint location, GLsizei count, const GLfloat* v) {
+ assert(count == 1);
+ if (vertex_shader) {
+ vertex_shader->set_uniform_4fv(location, v);
+ }
+}
+void UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
+ const GLfloat* value) {
+ assert(count == 1);
+ assert(!transpose);
+ if (vertex_shader) {
+ vertex_shader->set_uniform_matrix4fv(location, value);
+ }
+}
+
+void FramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget,
+ GLuint texture, GLint level) {
+ assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
+ assert(textarget == GL_TEXTURE_2D || textarget == GL_TEXTURE_RECTANGLE);
+ assert(level == 0);
+ Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
+ if (attachment == GL_COLOR_ATTACHMENT0) {
+ fb.color_attachment = texture;
+ } else if (attachment == GL_DEPTH_ATTACHMENT) {
+ fb.depth_attachment = texture;
+ } else {
+ assert(0);
+ }
+}
+
+void FramebufferRenderbuffer(GLenum target, GLenum attachment,
+ GLenum renderbuffertarget, GLuint renderbuffer) {
+ assert(target == GL_READ_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER);
+ assert(renderbuffertarget == GL_RENDERBUFFER);
+ Framebuffer& fb = ctx->framebuffers[ctx->get_binding(target)];
+ Renderbuffer& rb = ctx->renderbuffers[renderbuffer];
+ if (attachment == GL_COLOR_ATTACHMENT0) {
+ fb.color_attachment = rb.texture;
+ } else if (attachment == GL_DEPTH_ATTACHMENT) {
+ fb.depth_attachment = rb.texture;
+ } else {
+ assert(0);
+ }
+}
+
+} // extern "C"
+
+static inline Framebuffer* get_framebuffer(GLenum target,
+ bool fallback = false) {
+ if (target == GL_FRAMEBUFFER) {
+ target = GL_DRAW_FRAMEBUFFER;
+ }
+ Framebuffer* fb = ctx->framebuffers.find(ctx->get_binding(target));
+ if (fallback && !fb) {
+ // If the specified framebuffer isn't found and a fallback is requested,
+ // use the default framebuffer.
+ fb = &ctx->framebuffers[0];
+ }
+ return fb;
+}
+
+template <typename T>
+static inline void fill_n(T* dst, size_t n, T val) {
+ for (T* end = &dst[n]; dst < end; dst++) *dst = val;
+}
+
+#if USE_SSE2
+template <>
+inline void fill_n<uint32_t>(uint32_t* dst, size_t n, uint32_t val) {
+ __asm__ __volatile__("rep stosl\n"
+ : "+D"(dst), "+c"(n)
+ : "a"(val)
+ : "memory", "cc");
+}
+#endif
+
+static inline uint32_t clear_chunk(uint8_t value) {
+ return uint32_t(value) * 0x01010101U;
+}
+
+static inline uint32_t clear_chunk(uint16_t value) {
+ return uint32_t(value) | (uint32_t(value) << 16);
+}
+
+static inline uint32_t clear_chunk(uint32_t value) { return value; }
+
+template <typename T>
+static inline void clear_row(T* buf, size_t len, T value, uint32_t chunk) {
+ const size_t N = sizeof(uint32_t) / sizeof(T);
+ // fill any leading unaligned values
+ if (N > 1) {
+ size_t align = (-(intptr_t)buf & (sizeof(uint32_t) - 1)) / sizeof(T);
+ if (align <= len) {
+ fill_n(buf, align, value);
+ len -= align;
+ buf += align;
+ }
+ }
+ // fill as many aligned chunks as possible
+ fill_n((uint32_t*)buf, len / N, chunk);
+ // fill any remaining values
+ if (N > 1) {
+ fill_n(buf + (len & ~(N - 1)), len & (N - 1), value);
+ }
+}
+
+template <typename T>
+static void clear_buffer(Texture& t, T value, IntRect bb, int skip_start = 0,
+ int skip_end = 0) {
+ if (!t.buf) return;
+ skip_start = max(skip_start, bb.x0);
+ skip_end = max(skip_end, skip_start);
+ assert(sizeof(T) == t.bpp());
+ size_t stride = t.stride();
+ // When clearing multiple full-width rows, collapse them into a single large
+ // "row" to avoid redundant setup from clearing each row individually. Note
+ // that we can only safely do this if the stride is tightly packed.
+ if (bb.width() == t.width && bb.height() > 1 && skip_start >= skip_end &&
+ (t.should_free() || stride == t.width * sizeof(T))) {
+ bb.x1 += (stride / sizeof(T)) * (bb.height() - 1);
+ bb.y1 = bb.y0 + 1;
+ }
+ T* buf = (T*)t.sample_ptr(bb.x0, bb.y0);
+ uint32_t chunk = clear_chunk(value);
+ for (int rows = bb.height(); rows > 0; rows--) {
+ if (bb.x0 < skip_start) {
+ clear_row(buf, skip_start - bb.x0, value, chunk);
+ }
+ if (skip_end < bb.x1) {
+ clear_row(buf + (skip_end - bb.x0), bb.x1 - skip_end, value, chunk);
+ }
+ buf += stride / sizeof(T);
+ }
+}
+
+template <typename T>
+static inline void force_clear_row(Texture& t, int y, int skip_start = 0,
+ int skip_end = 0) {
+ assert(t.buf != nullptr);
+ assert(sizeof(T) == t.bpp());
+ assert(skip_start <= skip_end);
+ T* buf = (T*)t.sample_ptr(0, y);
+ uint32_t chunk = clear_chunk((T)t.clear_val);
+ if (skip_start > 0) {
+ clear_row<T>(buf, skip_start, t.clear_val, chunk);
+ }
+ if (skip_end < t.width) {
+ clear_row<T>(buf + skip_end, t.width - skip_end, t.clear_val, chunk);
+ }
+}
+
+template <typename T>
+static void force_clear(Texture& t, const IntRect* skip = nullptr) {
+ if (!t.delay_clear || !t.cleared_rows) {
+ return;
+ }
+ int y0 = 0;
+ int y1 = t.height;
+ int skip_start = 0;
+ int skip_end = 0;
+ if (skip) {
+ y0 = clamp(skip->y0, 0, t.height);
+ y1 = clamp(skip->y1, y0, t.height);
+ skip_start = clamp(skip->x0, 0, t.width);
+ skip_end = clamp(skip->x1, skip_start, t.width);
+ if (skip_start <= 0 && skip_end >= t.width && y0 <= 0 && y1 >= t.height) {
+ t.disable_delayed_clear();
+ return;
+ }
+ }
+ int num_masks = (y1 + 31) / 32;
+ uint32_t* rows = t.cleared_rows;
+ for (int i = y0 / 32; i < num_masks; i++) {
+ uint32_t mask = rows[i];
+ if (mask != ~0U) {
+ rows[i] = ~0U;
+ int start = i * 32;
+ while (mask) {
+ int count = __builtin_ctz(mask);
+ if (count > 0) {
+ clear_buffer<T>(t, t.clear_val,
+ IntRect{0, start, t.width, start + count}, skip_start,
+ skip_end);
+ t.delay_clear -= count;
+ start += count;
+ mask >>= count;
+ }
+ count = __builtin_ctz(mask + 1);
+ start += count;
+ mask >>= count;
+ }
+ int count = (i + 1) * 32 - start;
+ if (count > 0) {
+ clear_buffer<T>(t, t.clear_val,
+ IntRect{0, start, t.width, start + count}, skip_start,
+ skip_end);
+ t.delay_clear -= count;
+ }
+ }
+ }
+ if (t.delay_clear <= 0) t.disable_delayed_clear();
+}
+
+static void prepare_texture(Texture& t, const IntRect* skip) {
+ if (t.delay_clear) {
+ switch (t.internal_format) {
+ case GL_RGBA8:
+ force_clear<uint32_t>(t, skip);
+ break;
+ case GL_R8:
+ force_clear<uint8_t>(t, skip);
+ break;
+ case GL_RG8:
+ force_clear<uint16_t>(t, skip);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+}
+
+// Setup a clear on a texture. This may either force an immediate clear or
+// potentially punt to a delayed clear, if applicable.
+template <typename T>
+static void request_clear(Texture& t, T value, const IntRect& scissor) {
+ // If the clear would require a scissor, force clear anything outside
+ // the scissor, and then immediately clear anything inside the scissor.
+ if (!scissor.contains(t.offset_bounds())) {
+ IntRect skip = scissor - t.offset;
+ force_clear<T>(t, &skip);
+ clear_buffer<T>(t, value, skip.intersection(t.bounds()));
+ } else {
+ // Do delayed clear for 2D texture without scissor.
+ t.enable_delayed_clear(value);
+ }
+}
+
+template <typename T>
+static inline void request_clear(Texture& t, T value) {
+ // If scissoring is enabled, use the scissor rect. Otherwise, just scissor to
+ // the entire texture bounds.
+ request_clear(t, value, ctx->scissortest ? ctx->scissor : t.offset_bounds());
+}
+
+extern "C" {
+
+void InitDefaultFramebuffer(int x, int y, int width, int height, int stride,
+ void* buf) {
+ Framebuffer& fb = ctx->framebuffers[0];
+ if (!fb.color_attachment) {
+ GenTextures(1, &fb.color_attachment);
+ }
+ // If the dimensions or buffer properties changed, we need to reallocate
+ // the underlying storage for the color buffer texture.
+ Texture& colortex = ctx->textures[fb.color_attachment];
+ set_tex_storage(colortex, GL_RGBA8, width, height, buf, stride);
+ colortex.offset = IntPoint(x, y);
+ if (!fb.depth_attachment) {
+ GenTextures(1, &fb.depth_attachment);
+ }
+ // Ensure dimensions of the depth buffer match the color buffer.
+ Texture& depthtex = ctx->textures[fb.depth_attachment];
+ set_tex_storage(depthtex, GL_DEPTH_COMPONENT24, width, height);
+ depthtex.offset = IntPoint(x, y);
+}
+
+void* GetColorBuffer(GLuint fbo, GLboolean flush, int32_t* width,
+ int32_t* height, int32_t* stride) {
+ Framebuffer* fb = ctx->framebuffers.find(fbo);
+ if (!fb || !fb->color_attachment) {
+ return nullptr;
+ }
+ Texture& colortex = ctx->textures[fb->color_attachment];
+ if (flush) {
+ prepare_texture(colortex);
+ }
+ assert(colortex.offset == IntPoint(0, 0));
+ if (width) {
+ *width = colortex.width;
+ }
+ if (height) {
+ *height = colortex.height;
+ }
+ if (stride) {
+ *stride = colortex.stride();
+ }
+ return colortex.buf ? colortex.sample_ptr(0, 0) : nullptr;
+}
+
+void ResolveFramebuffer(GLuint fbo) {
+ Framebuffer* fb = ctx->framebuffers.find(fbo);
+ if (!fb || !fb->color_attachment) {
+ return;
+ }
+ Texture& colortex = ctx->textures[fb->color_attachment];
+ prepare_texture(colortex);
+}
+
+void SetTextureBuffer(GLuint texid, GLenum internal_format, GLsizei width,
+ GLsizei height, GLsizei stride, void* buf,
+ GLsizei min_width, GLsizei min_height) {
+ Texture& t = ctx->textures[texid];
+ set_tex_storage(t, internal_format, width, height, buf, stride, min_width,
+ min_height);
+}
+
+GLenum CheckFramebufferStatus(GLenum target) {
+ Framebuffer* fb = get_framebuffer(target);
+ if (!fb || !fb->color_attachment) {
+ return GL_FRAMEBUFFER_UNSUPPORTED;
+ }
+ return GL_FRAMEBUFFER_COMPLETE;
+}
+
+void ClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset,
+ GLint zoffset, GLsizei width, GLsizei height,
+ GLsizei depth, GLenum format, GLenum type,
+ const void* data) {
+ if (level != 0) {
+ assert(false);
+ return;
+ }
+ Texture& t = ctx->textures[texture];
+ assert(!t.locked);
+ if (width <= 0 || height <= 0 || depth <= 0) {
+ return;
+ }
+ assert(zoffset == 0 && depth == 1);
+ IntRect scissor = {xoffset, yoffset, xoffset + width, yoffset + height};
+ if (t.internal_format == GL_DEPTH_COMPONENT24) {
+ uint32_t value = 0xFFFFFF;
+ switch (format) {
+ case GL_DEPTH_COMPONENT:
+ switch (type) {
+ case GL_DOUBLE:
+ value = uint32_t(*(const GLdouble*)data * 0xFFFFFF);
+ break;
+ case GL_FLOAT:
+ value = uint32_t(*(const GLfloat*)data * 0xFFFFFF);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ if (t.cleared() && !scissor.contains(t.offset_bounds())) {
+ // If we need to scissor the clear and the depth buffer was already
+ // initialized, then just fill runs for that scissor area.
+ t.fill_depth_runs(value, scissor);
+ } else {
+ // Otherwise, the buffer is either uninitialized or the clear would
+ // encompass the entire buffer. If uninitialized, we can safely fill
+ // the entire buffer with any value and thus ignore any scissoring.
+ t.init_depth_runs(value);
+ }
+ return;
+ }
+
+ uint32_t color = 0xFF000000;
+ switch (type) {
+ case GL_FLOAT: {
+ const GLfloat* f = (const GLfloat*)data;
+ Float v = {0.0f, 0.0f, 0.0f, 1.0f};
+ switch (format) {
+ case GL_RGBA:
+ v.w = f[3]; // alpha
+ FALLTHROUGH;
+ case GL_RGB:
+ v.z = f[2]; // blue
+ FALLTHROUGH;
+ case GL_RG:
+ v.y = f[1]; // green
+ FALLTHROUGH;
+ case GL_RED:
+ v.x = f[0]; // red
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ color = bit_cast<uint32_t>(CONVERT(round_pixel(v), U8));
+ break;
+ }
+ case GL_UNSIGNED_BYTE: {
+ const GLubyte* b = (const GLubyte*)data;
+ switch (format) {
+ case GL_RGBA:
+ color = (color & ~0xFF000000) | (uint32_t(b[3]) << 24); // alpha
+ FALLTHROUGH;
+ case GL_RGB:
+ color = (color & ~0x00FF0000) | (uint32_t(b[2]) << 16); // blue
+ FALLTHROUGH;
+ case GL_RG:
+ color = (color & ~0x0000FF00) | (uint32_t(b[1]) << 8); // green
+ FALLTHROUGH;
+ case GL_RED:
+ color = (color & ~0x000000FF) | uint32_t(b[0]); // red
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ break;
+ }
+ default:
+ assert(false);
+ break;
+ }
+
+ switch (t.internal_format) {
+ case GL_RGBA8:
+ // Clear color needs to swizzle to BGRA.
+ request_clear<uint32_t>(t,
+ (color & 0xFF00FF00) |
+ ((color << 16) & 0xFF0000) |
+ ((color >> 16) & 0xFF),
+ scissor);
+ break;
+ case GL_R8:
+ request_clear<uint8_t>(t, uint8_t(color & 0xFF), scissor);
+ break;
+ case GL_RG8:
+ request_clear<uint16_t>(t, uint16_t(color & 0xFFFF), scissor);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+void ClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type,
+ const void* data) {
+ Texture& t = ctx->textures[texture];
+ IntRect scissor = t.offset_bounds();
+ ClearTexSubImage(texture, level, scissor.x0, scissor.y0, 0, scissor.width(),
+ scissor.height(), 1, format, type, data);
+}
+
+void Clear(GLbitfield mask) {
+ Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
+ if ((mask & GL_COLOR_BUFFER_BIT) && fb.color_attachment) {
+ Texture& t = ctx->textures[fb.color_attachment];
+ IntRect scissor = ctx->scissortest
+ ? ctx->scissor.intersection(t.offset_bounds())
+ : t.offset_bounds();
+ ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
+ scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
+ ctx->clearcolor);
+ }
+ if ((mask & GL_DEPTH_BUFFER_BIT) && fb.depth_attachment) {
+ Texture& t = ctx->textures[fb.depth_attachment];
+ IntRect scissor = ctx->scissortest
+ ? ctx->scissor.intersection(t.offset_bounds())
+ : t.offset_bounds();
+ ClearTexSubImage(fb.depth_attachment, 0, scissor.x0, scissor.y0, 0,
+ scissor.width(), scissor.height(), 1, GL_DEPTH_COMPONENT,
+ GL_DOUBLE, &ctx->cleardepth);
+ }
+}
+
+void ClearColorRect(GLuint fbo, GLint xoffset, GLint yoffset, GLsizei width,
+ GLsizei height, GLfloat r, GLfloat g, GLfloat b,
+ GLfloat a) {
+ GLfloat color[] = {r, g, b, a};
+ Framebuffer& fb = ctx->framebuffers[fbo];
+ Texture& t = ctx->textures[fb.color_attachment];
+ IntRect scissor =
+ IntRect{xoffset, yoffset, xoffset + width, yoffset + height}.intersection(
+ t.offset_bounds());
+ ClearTexSubImage(fb.color_attachment, 0, scissor.x0, scissor.y0, 0,
+ scissor.width(), scissor.height(), 1, GL_RGBA, GL_FLOAT,
+ color);
+}
+
+void InvalidateFramebuffer(GLenum target, GLsizei num_attachments,
+ const GLenum* attachments) {
+ Framebuffer* fb = get_framebuffer(target);
+ if (!fb || num_attachments <= 0 || !attachments) {
+ return;
+ }
+ for (GLsizei i = 0; i < num_attachments; i++) {
+ switch (attachments[i]) {
+ case GL_DEPTH_ATTACHMENT: {
+ Texture& t = ctx->textures[fb->depth_attachment];
+ t.set_cleared(false);
+ break;
+ }
+ case GL_COLOR_ATTACHMENT0: {
+ Texture& t = ctx->textures[fb->color_attachment];
+ t.disable_delayed_clear();
+ break;
+ }
+ }
+ }
+}
+
+void ReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format,
+ GLenum type, void* data) {
+ data = get_pixel_pack_buffer_data(data);
+ if (!data) return;
+ Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
+ if (!fb) return;
+ assert(format == GL_RED || format == GL_RGBA || format == GL_RGBA_INTEGER ||
+ format == GL_BGRA || format == GL_RG);
+ Texture& t = ctx->textures[fb->color_attachment];
+ if (!t.buf) return;
+ prepare_texture(t);
+ // debugf("read pixels %d, %d, %d, %d from fb %d with format %x\n", x, y,
+ // width, height, ctx->read_framebuffer_binding, t.internal_format);
+ x -= t.offset.x;
+ y -= t.offset.y;
+ assert(x >= 0 && y >= 0);
+ assert(x + width <= t.width);
+ assert(y + height <= t.height);
+ if (internal_format_for_data(format, type) != t.internal_format) {
+ debugf("mismatched format for read pixels: %x vs %x\n", t.internal_format,
+ internal_format_for_data(format, type));
+ assert(false);
+ return;
+ }
+ // Only support readback conversions that are reversible
+ assert(!format_requires_conversion(format, t.internal_format) ||
+ bytes_for_internal_format(format) == t.bpp());
+ uint8_t* dest = (uint8_t*)data;
+ size_t destStride = width * t.bpp();
+ if (y < 0) {
+ dest += -y * destStride;
+ height += y;
+ y = 0;
+ }
+ if (y + height > t.height) {
+ height = t.height - y;
+ }
+ if (x < 0) {
+ dest += -x * t.bpp();
+ width += x;
+ x = 0;
+ }
+ if (x + width > t.width) {
+ width = t.width - x;
+ }
+ if (width <= 0 || height <= 0) {
+ return;
+ }
+ convert_copy(format, t.internal_format, dest, destStride,
+ (const uint8_t*)t.sample_ptr(x, y), t.stride(), width, height);
+}
+
+void CopyImageSubData(GLuint srcName, GLenum srcTarget, UNUSED GLint srcLevel,
+ GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
+ GLenum dstTarget, UNUSED GLint dstLevel, GLint dstX,
+ GLint dstY, GLint dstZ, GLsizei srcWidth,
+ GLsizei srcHeight, GLsizei srcDepth) {
+ assert(srcLevel == 0 && dstLevel == 0);
+ assert(srcZ == 0 && srcDepth == 1 && dstZ == 0);
+ if (srcTarget == GL_RENDERBUFFER) {
+ Renderbuffer& rb = ctx->renderbuffers[srcName];
+ srcName = rb.texture;
+ }
+ if (dstTarget == GL_RENDERBUFFER) {
+ Renderbuffer& rb = ctx->renderbuffers[dstName];
+ dstName = rb.texture;
+ }
+ Texture& srctex = ctx->textures[srcName];
+ if (!srctex.buf) return;
+ prepare_texture(srctex);
+ Texture& dsttex = ctx->textures[dstName];
+ if (!dsttex.buf) return;
+ assert(!dsttex.locked);
+ IntRect skip = {dstX, dstY, dstX + srcWidth, dstY + srcHeight};
+ prepare_texture(dsttex, &skip);
+ assert(srctex.internal_format == dsttex.internal_format);
+ assert(srcWidth >= 0);
+ assert(srcHeight >= 0);
+ assert(srcX + srcWidth <= srctex.width);
+ assert(srcY + srcHeight <= srctex.height);
+ assert(dstX + srcWidth <= dsttex.width);
+ assert(dstY + srcHeight <= dsttex.height);
+ int bpp = srctex.bpp();
+ int src_stride = srctex.stride();
+ int dest_stride = dsttex.stride();
+ char* dest = dsttex.sample_ptr(dstX, dstY);
+ char* src = srctex.sample_ptr(srcX, srcY);
+ for (int y = 0; y < srcHeight; y++) {
+ memcpy(dest, src, srcWidth * bpp);
+ dest += dest_stride;
+ src += src_stride;
+ }
+}
+
+void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,
+ GLint yoffset, GLint x, GLint y, GLsizei width,
+ GLsizei height) {
+ assert(level == 0);
+ Framebuffer* fb = get_framebuffer(GL_READ_FRAMEBUFFER);
+ if (!fb) return;
+ CopyImageSubData(fb->color_attachment, GL_TEXTURE_2D, 0, x, y, 0,
+ ctx->get_binding(target), GL_TEXTURE_2D, 0, xoffset, yoffset,
+ 0, width, height, 1);
+}
+
+} // extern "C"
+
+#include "blend.h"
+#include "composite.h"
+#include "swgl_ext.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wuninitialized"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#ifdef __clang__
+# pragma GCC diagnostic ignored "-Wunused-private-field"
+#else
+# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#include "load_shader.h"
+#pragma GCC diagnostic pop
+
+#include "rasterize.h"
+
+void VertexArray::validate() {
+ int last_enabled = -1;
+ for (int i = 0; i <= max_attrib; i++) {
+ VertexAttrib& attr = attribs[i];
+ if (attr.enabled) {
+ // VertexArray &v = ctx->vertex_arrays[attr.vertex_array];
+ Buffer& vertex_buf = ctx->buffers[attr.vertex_buffer];
+ attr.buf = vertex_buf.buf;
+ attr.buf_size = vertex_buf.size;
+ // debugf("%d %x %d %d %d %d\n", i, attr.type, attr.size, attr.stride,
+ // attr.offset, attr.divisor);
+ last_enabled = i;
+ }
+ }
+ max_attrib = last_enabled;
+}
+
+extern "C" {
+
+void DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
+ GLintptr offset, GLsizei instancecount) {
+ if (offset < 0 || count <= 0 || instancecount <= 0 || !vertex_shader ||
+ !fragment_shader) {
+ return;
+ }
+
+ Framebuffer& fb = *get_framebuffer(GL_DRAW_FRAMEBUFFER, true);
+ if (!fb.color_attachment) {
+ return;
+ }
+ Texture& colortex = ctx->textures[fb.color_attachment];
+ if (!colortex.buf) {
+ return;
+ }
+ assert(!colortex.locked);
+ assert(colortex.internal_format == GL_RGBA8 ||
+ colortex.internal_format == GL_R8);
+ Texture& depthtex = ctx->textures[ctx->depthtest ? fb.depth_attachment : 0];
+ if (depthtex.buf) {
+ assert(depthtex.internal_format == GL_DEPTH_COMPONENT24);
+ assert(colortex.width == depthtex.width &&
+ colortex.height == depthtex.height);
+ assert(colortex.offset == depthtex.offset);
+ }
+
+ // debugf("current_vertex_array %d\n", ctx->current_vertex_array);
+ // debugf("indices size: %d\n", indices_buf.size);
+ VertexArray& v = ctx->vertex_arrays[ctx->current_vertex_array];
+ if (ctx->validate_vertex_array) {
+ ctx->validate_vertex_array = false;
+ v.validate();
+ }
+
+#ifdef PRINT_TIMINGS
+ uint64_t start = get_time_value();
+#endif
+
+ ctx->shaded_rows = 0;
+ ctx->shaded_pixels = 0;
+
+ vertex_shader->init_batch();
+
+ switch (type) {
+ case GL_UNSIGNED_SHORT:
+ assert(mode == GL_TRIANGLES);
+ draw_elements<uint16_t>(count, instancecount, offset, v, colortex,
+ depthtex);
+ break;
+ case GL_UNSIGNED_INT:
+ assert(mode == GL_TRIANGLES);
+ draw_elements<uint32_t>(count, instancecount, offset, v, colortex,
+ depthtex);
+ break;
+ case GL_NONE:
+ // Non-standard GL extension - if element type is GL_NONE, then we don't
+ // use any element buffer and behave as if DrawArrays was called instead.
+ for (GLsizei instance = 0; instance < instancecount; instance++) {
+ switch (mode) {
+ case GL_LINES:
+ for (GLsizei i = 0; i + 2 <= count; i += 2) {
+ vertex_shader->load_attribs(v.attribs, offset + i, instance, 2);
+ draw_quad(2, colortex, depthtex);
+ }
+ break;
+ case GL_TRIANGLES:
+ for (GLsizei i = 0; i + 3 <= count; i += 3) {
+ vertex_shader->load_attribs(v.attribs, offset + i, instance, 3);
+ draw_quad(3, colortex, depthtex);
+ }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ if (ctx->samples_passed_query) {
+ Query& q = ctx->queries[ctx->samples_passed_query];
+ q.value += ctx->shaded_pixels;
+ }
+
+#ifdef PRINT_TIMINGS
+ uint64_t end = get_time_value();
+ printf(
+ "%7.3fms draw(%s, %d): %d pixels in %d rows (avg %f pixels/row, "
+ "%fns/pixel)\n",
+ double(end - start) / (1000. * 1000.),
+ ctx->programs[ctx->current_program].impl->get_name(), instancecount,
+ ctx->shaded_pixels, ctx->shaded_rows,
+ double(ctx->shaded_pixels) / ctx->shaded_rows,
+ double(end - start) / max(ctx->shaded_pixels, 1));
+#endif
+}
+
+void Finish() {
+#ifdef PRINT_TIMINGS
+ printf("Finish\n");
+#endif
+}
+
+void MakeCurrent(Context* c) {
+ if (ctx == c) {
+ return;
+ }
+ ctx = c;
+ setup_program(ctx ? ctx->current_program : 0);
+}
+
+Context* CreateContext() { return new Context; }
+
+void ReferenceContext(Context* c) {
+ if (!c) {
+ return;
+ }
+ ++c->references;
+}
+
+void DestroyContext(Context* c) {
+ if (!c) {
+ return;
+ }
+ assert(c->references > 0);
+ --c->references;
+ if (c->references > 0) {
+ return;
+ }
+ if (ctx == c) {
+ MakeCurrent(nullptr);
+ }
+ delete c;
+}
+
+size_t ReportMemory(Context* ctx, size_t (*size_of_op)(const void*)) {
+ size_t size = 0;
+ if (ctx) {
+ for (auto& t : ctx->textures) {
+ if (t && t->should_free()) {
+ size += size_of_op(t->buf);
+ }
+ }
+ }
+ return size;
+}
+} // extern "C"