summaryrefslogtreecommitdiffstats
path: root/gfx/wr/swgl/src/swgl_ext.h
diff options
context:
space:
mode:
Diffstat (limited to 'gfx/wr/swgl/src/swgl_ext.h')
-rw-r--r--gfx/wr/swgl/src/swgl_ext.h532
1 files changed, 532 insertions, 0 deletions
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
new file mode 100644
index 0000000000..fd4e587889
--- /dev/null
+++ b/gfx/wr/swgl/src/swgl_ext.h
@@ -0,0 +1,532 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+static inline void commit_span(uint32_t* buf, WideRGBA8 r) {
+ if (blend_key) r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r);
+ unaligned_store(buf, pack(r));
+}
+
+static inline void commit_span(uint32_t* buf, PackedRGBA8 r) {
+ if (blend_key)
+ r = pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), unpack(r)));
+ unaligned_store(buf, r);
+}
+
+UNUSED static inline void commit_solid_span(uint32_t* buf, WideRGBA8 r,
+ int len) {
+ if (blend_key) {
+ for (uint32_t* end = &buf[len & ~3]; buf < end; buf += 4) {
+ unaligned_store(
+ buf, pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r)));
+ }
+ len &= 3;
+ if (len > 0) {
+ partial_store_span(
+ buf,
+ pack(blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, len), r,
+ len)),
+ len);
+ }
+ } else {
+ fill_n(buf, len, bit_cast<U32>(pack(r)).x);
+ }
+}
+
+static inline void commit_span(uint8_t* buf, WideR8 r) {
+ if (blend_key)
+ r = blend_pixels(buf, unpack(unaligned_load<PackedR8>(buf)), r);
+ unaligned_store(buf, pack(r));
+}
+
+UNUSED static inline void commit_solid_span(uint8_t* buf, WideR8 r, int len) {
+ if (blend_key) {
+ for (uint8_t* end = &buf[len]; buf < end; buf += 4) {
+ unaligned_store(buf, pack(blend_pixels(
+ buf, unpack(unaligned_load<PackedR8>(buf)), r)));
+ }
+ } else {
+ fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(pack(r)));
+ }
+}
+
+template <typename V>
+static inline WideRGBA8 pack_span(uint32_t*, const V& v) {
+ return pack_pixels_RGBA8(v);
+}
+
+static inline WideRGBA8 pack_span(uint32_t*) { return pack_pixels_RGBA8(); }
+
+template <typename C>
+static inline WideR8 pack_span(uint8_t*, C c) {
+ return pack_pixels_R8(c);
+}
+
+static inline WideR8 pack_span(uint8_t*) { return pack_pixels_R8(); }
+
+// Forces a value with vector run-class to have scalar run-class.
+template <typename T>
+static ALWAYS_INLINE auto swgl_forceScalar(T v) -> decltype(force_scalar(v)) {
+ return force_scalar(v);
+}
+
+// Advance all varying inperpolants by a single chunk
+#define swgl_stepInterp() step_interp_inputs()
+
+// Pseudo-intrinsic that accesses the interpolation step for a given varying
+#define swgl_interpStep(v) (interp_step.v)
+
+// Commit an entire span of a solid color
+#define swgl_commitSolid(format, v) \
+ do { \
+ commit_solid_span(swgl_Out##format, pack_span(swgl_Out##format, (v)), \
+ swgl_SpanLength); \
+ swgl_Out##format += swgl_SpanLength; \
+ swgl_SpanLength = 0; \
+ } while (0)
+#define swgl_commitSolidRGBA8(v) swgl_commitSolid(RGBA8, v)
+#define swgl_commitSolidR8(v) swgl_commitSolid(R8, v)
+
+#define swgl_commitChunk(format, chunk) \
+ do { \
+ commit_span(swgl_Out##format, chunk); \
+ swgl_Out##format += swgl_StepSize; \
+ swgl_SpanLength -= swgl_StepSize; \
+ } while (0)
+
+static inline WideRGBA8 pack_pixels_RGBA8(Float alpha) {
+ I32 i = round_pixel(alpha);
+ HalfRGBA8 c = packRGBA8(zipLow(i, i), zipHigh(i, i));
+ return combine(zipLow(c, c), zipHigh(c, c));
+}
+
+static inline WideRGBA8 pack_pixels_RGBA8(float alpha) {
+ I32 i = round_pixel(alpha);
+ HalfRGBA8 c = packRGBA8(i, i);
+ return combine(c, c);
+}
+
+// Commit a single chunk of a color scaled by an alpha weight
+#define swgl_commitColor(format, color, alpha) \
+ swgl_commitChunk(format, muldiv255(pack_pixels_##format(color), \
+ pack_pixels_##format(alpha)))
+#define swgl_commitColorRGBA8(color, alpha) \
+ swgl_commitColor(RGBA8, color, alpha)
+#define swgl_commitColorR8(color, alpha) swgl_commitColor(R8, color, alpha)
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureLinear(S s) {
+ return s->filter == TextureFilter::LINEAR;
+}
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureRGBA8(S s) {
+ return s->format == TextureFormat::RGBA8;
+}
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureR8(S s) {
+ return s->format == TextureFormat::R8;
+}
+
+// Returns the offset into the texture buffer for the given layer index. If not
+// a texture array or 3D texture, this will always access the first layer.
+template <typename S>
+static ALWAYS_INLINE int swgl_textureLayerOffset(S s, float layer) {
+ return 0;
+}
+
+UNUSED static ALWAYS_INLINE int swgl_textureLayerOffset(sampler2DArray s,
+ float layer) {
+ return clampCoord(int(layer), s->depth) * s->height_stride;
+}
+
+// Use the default linear quantization scale of 128. This gives 7 bits of
+// fractional precision, which when multiplied with a signed 9 bit value
+// still fits in a 16 bit integer.
+const int swgl_LinearQuantizeScale = 128;
+
+// Quantizes UVs for access into a linear texture.
+template <typename S, typename T>
+static ALWAYS_INLINE T swgl_linearQuantize(S s, T p) {
+ return linearQuantize(p, swgl_LinearQuantizeScale, s);
+}
+
+// Quantizes an interpolation step for UVs for access into a linear texture.
+template <typename S, typename T>
+static ALWAYS_INLINE T swgl_linearQuantizeStep(S s, T p) {
+ return samplerScale(s, p) * swgl_LinearQuantizeScale;
+}
+
+// Commit a single chunk from a linear texture fetch
+#define swgl_commitTextureLinear(format, s, p, ...) \
+ swgl_commitChunk(format, \
+ textureLinearUnpacked##format(s, ivec2(p), __VA_ARGS__))
+#define swgl_commitTextureLinearRGBA8(s, p, ...) \
+ swgl_commitTextureLinear(RGBA8, s, p, __VA_ARGS__)
+#define swgl_commitTextureLinearR8(s, p, ...) \
+ swgl_commitTextureLinear(R8, s, p, __VA_ARGS__)
+
+// Commit a single chunk from a linear texture fetch that is scaled by a color
+#define swgl_commitTextureLinearColor(format, s, p, color, ...) \
+ swgl_commitChunk(format, muldiv255(textureLinearUnpacked##format( \
+ s, ivec2(p), __VA_ARGS__), \
+ pack_pixels_##format(color)))
+#define swgl_commitTextureLinearColorRGBA8(s, p, color, ...) \
+ swgl_commitTextureLinearColor(RGBA8, s, p, color, __VA_ARGS__)
+#define swgl_commitTextureLinearColorR8(s, p, color, ...) \
+ swgl_commitTextureLinearColor(R8, s, p, color, __VA_ARGS__)
+
+// Commit an entire span of a separable pass of a Gaussian blur that falls
+// within the given radius scaled by supplied coefficients, clamped to uv_rect
+// bounds.
+#define swgl_commitGaussianBlur(format, type, s, p, uv_rect, hori, radius, \
+ coeffs, ...) \
+ do { \
+ vec2_scalar size = {float(s->width), float(s->height)}; \
+ ivec2_scalar curUV = make_ivec2(force_scalar(p) * size); \
+ ivec4_scalar bounds = make_ivec4(uv_rect * make_vec4(size, size)); \
+ int endX = min(bounds.z, curUV.x + swgl_SpanLength * swgl_StepSize); \
+ if (hori) { \
+ for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) { \
+ swgl_commitChunk(format, gaussianBlurHorizontal<type>( \
+ s, curUV, bounds.x, bounds.z, radius, \
+ coeffs.x, coeffs.y, __VA_ARGS__)); \
+ } \
+ } else { \
+ for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) { \
+ swgl_commitChunk(format, gaussianBlurVertical<type>( \
+ s, curUV, bounds.y, bounds.w, radius, \
+ coeffs.x, coeffs.y, __VA_ARGS__)); \
+ } \
+ } \
+ } while (0)
+#define swgl_commitGaussianBlurRGBA8(s, p, uv_rect, hori, radius, coeffs, ...) \
+ swgl_commitGaussianBlur(RGBA8, uint32_t, s, p, uv_rect, hori, radius, \
+ coeffs, __VA_ARGS__)
+#define swgl_commitGaussianBlurR8(s, p, uv_rect, hori, radius, coeffs, ...) \
+ swgl_commitGaussianBlur(R8, uint8_t, s, p, uv_rect, hori, radius, coeffs, \
+ __VA_ARGS__)
+
+// Convert and pack planar YUV samples to RGB output using a color space
+static ALWAYS_INLINE PackedRGBA8 convertYUV(int colorSpace, U16 y, U16 u,
+ U16 v) {
+ auto yy = V8<int16_t>(zip(y, y));
+ auto uv = V8<int16_t>(zip(u, v));
+ switch (colorSpace) {
+ case REC_601:
+ return YUVConverter<REC_601>::convert(yy, uv);
+ case REC_709:
+ return YUVConverter<REC_709>::convert(yy, uv);
+ case REC_2020:
+ return YUVConverter<REC_2020>::convert(yy, uv);
+ default:
+ return YUVConverter<IDENTITY>::convert(yy, uv);
+ }
+}
+
+// Helper functions to sample from planar YUV textures before converting to RGB
+template <typename S0>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+ int colorSpace, int rescaleFactor) {
+ ivec2 i0(uv0);
+ switch (sampler0->format) {
+ case TextureFormat::RGBA8: {
+ auto planar = textureLinearPlanarRGBA8(sampler0, i0, layer0);
+ return convertYUV(colorSpace, highHalf(planar.rg), lowHalf(planar.rg),
+ lowHalf(planar.ba));
+ }
+ case TextureFormat::YUV422: {
+ auto planar = textureLinearPlanarYUV422(sampler0, i0, layer0);
+ return convertYUV(colorSpace, planar.y, planar.u, planar.v);
+ }
+ default:
+ assert(false);
+ return PackedRGBA8(0);
+ }
+}
+
+template <typename S0, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+ int colorSpace, int rescaleFactor,
+ C color) {
+ return muldiv255(
+ unpack(sampleYUV(sampler0, uv0, layer0, colorSpace, rescaleFactor)),
+ pack_pixels_RGBA8(color));
+}
+
+template <typename S0, typename S1>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+ S1 sampler1, vec2 uv1, int layer1,
+ int colorSpace, int rescaleFactor) {
+ ivec2 i0(uv0);
+ ivec2 i1(uv1);
+ switch (sampler1->format) {
+ case TextureFormat::RG8: {
+ assert(sampler0->format == TextureFormat::R8);
+ auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+ auto planar = textureLinearPlanarRG8(sampler1, i1, layer1);
+ return convertYUV(colorSpace, y, lowHalf(planar.rg), highHalf(planar.rg));
+ }
+ case TextureFormat::RGBA8: {
+ assert(sampler0->format == TextureFormat::R8);
+ auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+ auto planar = textureLinearPlanarRGBA8(sampler1, i1, layer1);
+ return convertYUV(colorSpace, y, lowHalf(planar.ba), highHalf(planar.rg));
+ }
+ default:
+ assert(false);
+ return PackedRGBA8(0);
+ }
+}
+
+template <typename S0, typename S1, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+ S1 sampler1, vec2 uv1, int layer1,
+ int colorSpace, int rescaleFactor,
+ C color) {
+ return muldiv255(unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1,
+ layer1, colorSpace, rescaleFactor)),
+ pack_pixels_RGBA8(color));
+}
+
+template <typename S0, typename S1, typename S2>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+ S1 sampler1, vec2 uv1, int layer1,
+ S2 sampler2, vec2 uv2, int layer2,
+ int colorSpace, int rescaleFactor) {
+ ivec2 i0(uv0);
+ ivec2 i1(uv1);
+ ivec2 i2(uv2);
+ assert(sampler0->format == sampler1->format &&
+ sampler0->format == sampler2->format);
+ switch (sampler0->format) {
+ case TextureFormat::R8: {
+ auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+ auto u = textureLinearUnpackedR8(sampler1, i1, layer1);
+ auto v = textureLinearUnpackedR8(sampler2, i2, layer2);
+ return convertYUV(colorSpace, y, u, v);
+ }
+ case TextureFormat::R16: {
+ // The rescaling factor represents how many bits to add to renormalize the
+ // texture to 16 bits, and so the color depth is actually 16 minus the
+ // rescaling factor.
+ // Need to right shift the sample by the amount of bits over 8 it
+ // occupies. On output from textureLinearUnpackedR16, we have lost 1 bit
+ // of precision at the low end already, hence 1 is subtracted from the
+ // color depth.
+ int colorDepth = 16 - rescaleFactor;
+ int rescaleBits = (colorDepth - 1) - 8;
+ auto y = textureLinearUnpackedR16(sampler0, i0, layer0) >> rescaleBits;
+ auto u = textureLinearUnpackedR16(sampler1, i1, layer1) >> rescaleBits;
+ auto v = textureLinearUnpackedR16(sampler2, i2, layer2) >> rescaleBits;
+ return convertYUV(colorSpace, U16(y), U16(u), U16(v));
+ }
+ default:
+ assert(false);
+ return PackedRGBA8(0);
+ }
+}
+
+template <typename S0, typename S1, typename S2, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+ S1 sampler1, vec2 uv1, int layer1,
+ S2 sampler2, vec2 uv2, int layer2,
+ int colorSpace, int rescaleFactor,
+ C color) {
+ return muldiv255(
+ unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1, layer1, sampler2,
+ uv2, layer2, colorSpace, rescaleFactor)),
+ pack_pixels_RGBA8(color));
+}
+
+// Commit a single chunk of a YUV surface represented by multiple planar
+// textures. This requires a color space specifier selecting how to convert
+// from YUV to RGB output. In the case of HDR formats, a rescaling factor
+// selects how many bits of precision must be utilized on conversion. See the
+// sampleYUV dispatcher functions for the various supported plane
+// configurations this intrinsic accepts.
+#define swgl_commitTextureLinearYUV(...) \
+ swgl_commitChunk(RGBA8, sampleYUV(__VA_ARGS__))
+// Commit a single chunk of a YUV surface scaled by a color.
+#define swgl_commitTextureLinearColorYUV(...) \
+ swgl_commitChunk(RGBA8, sampleColorYUV(__VA_ARGS__))
+
+// Helper functions to apply a color modulus when available.
+struct NoColor {};
+
+SI WideRGBA8 applyColor(WideRGBA8 src, NoColor) { return src; }
+
+SI WideRGBA8 applyColor(WideRGBA8 src, WideRGBA8 color) {
+ return muldiv255(src, color);
+}
+
+SI PackedRGBA8 applyColor(PackedRGBA8 src, NoColor) { return src; }
+
+SI PackedRGBA8 applyColor(PackedRGBA8 src, WideRGBA8 color) {
+ return pack(muldiv255(unpack(src), color));
+}
+
+// Samples an axis-aligned span of on a single row of a texture using 1:1
+// nearest filtering. Sampling is constrained to only fall within the given UV
+// bounds. This requires a pointer to the destination buffer. An optional color
+// modulus can be supplied.
+template <typename S, typename C>
+static void blendTextureNearestRGBA8(S sampler, const ivec2_scalar& i, int span,
+ const ivec2_scalar& minUV,
+ const ivec2_scalar& maxUV, C color,
+ uint32_t* buf, int layerOffset = 0) {
+ // Calculate the row pointer within the buffer, clamping to within valid row
+ // bounds.
+ uint32_t* row =
+ &sampler->buf[clamp(clampCoord(i.y, sampler->height), minUV.y, maxUV.y) *
+ sampler->stride +
+ layerOffset];
+ // Find clamped X bounds within the row.
+ int minX = clamp(minUV.x, 0, sampler->width - 1);
+ int maxX = clamp(maxUV.x, minX, sampler->width - 1);
+ int curX = i.x;
+ // If we need to start sampling below the valid sample bounds, then we need to
+ // fill this section with a constant clamped sample.
+ if (curX < minX) {
+ int n = min(minX - curX, span);
+ auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[minX]))), color);
+ commit_solid_span(buf, src, n);
+ buf += n;
+ span -= n;
+ curX += n;
+ }
+ // Here we only deal with valid samples within the sample bounds. No clamping
+ // should occur here within these inner loops.
+ int n = clamp(maxX + 1 - curX, 0, span);
+ span -= n;
+ // Try to process as many chunks as possible with full loads and stores.
+ if (blend_key) {
+ for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) {
+ auto src =
+ applyColor(unpack(unaligned_load<PackedRGBA8>(&row[curX])), color);
+ auto r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), src);
+ unaligned_store(buf, pack(r));
+ }
+ } else {
+ for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) {
+ auto src = applyColor(unaligned_load<PackedRGBA8>(&row[curX]), color);
+ unaligned_store(buf, src);
+ }
+ }
+ n &= 3;
+ // If we have any leftover samples after processing chunks, use partial loads
+ // and stores.
+ if (n > 0) {
+ if (blend_key) {
+ auto src = applyColor(
+ unpack(partial_load_span<PackedRGBA8>(&row[curX], n)), color);
+ auto r =
+ blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, n), src, n);
+ partial_store_span(buf, pack(r), n);
+ } else {
+ auto src =
+ applyColor(partial_load_span<PackedRGBA8>(&row[curX], n), color);
+ partial_store_span(buf, src, n);
+ }
+ buf += n;
+ curX += n;
+ }
+ // If we still have samples left above the valid sample bounds, then we again
+ // need to fill this section with a constant clamped sample.
+ if (span > 0) {
+ auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[maxX]))), color);
+ commit_solid_span(buf, src, span);
+ }
+}
+
+// TODO: blendTextureNearestR8 if it is actually needed
+
+// Commit an entire span of 1:1 nearest texture fetches, potentially scaled by a
+// color
+#define swgl_commitTextureNearest(format, s, p, uv_rect, color, ...) \
+ do { \
+ ivec2_scalar i = make_ivec2(samplerScale(s, force_scalar(p))); \
+ ivec2_scalar min_uv = \
+ make_ivec2(samplerScale(s, vec2_scalar{uv_rect.x, uv_rect.y})); \
+ ivec2_scalar max_uv = \
+ make_ivec2(samplerScale(s, vec2_scalar{uv_rect.z, uv_rect.w})); \
+ blendTextureNearest##format(s, i, swgl_SpanLength, min_uv, max_uv, color, \
+ swgl_Out##format, __VA_ARGS__); \
+ swgl_Out##format += swgl_SpanLength; \
+ swgl_SpanLength = 0; \
+ } while (0)
+#define swgl_commitTextureNearestRGBA8(s, p, uv_rect, ...) \
+ swgl_commitTextureNearest(RGBA8, s, p, uv_rect, NoColor(), __VA_ARGS__)
+#define swgl_commitTextureNearestR8(s, p, uv_rect, ...) \
+ swgl_commitTextureNearest(R8, s, p, uv_rect, NoColor(), __VA_ARGS__)
+
+#define swgl_commitTextureNearestColor(format, s, p, uv_rect, color, ...) \
+ swgl_commitTextureNearest(format, s, p, uv_rect, \
+ pack_pixels_##format(color), __VA_ARGS__)
+#define swgl_commitTextureNearestColorRGBA8(s, p, uv_rect, color, ...) \
+ swgl_commitTextureNearestColor(RGBA8, s, p, uv_rect, color, __VA_ARGS__)
+#define swgl_commitTextureNearestColorR8(s, p, uv_rect, color, ...) \
+ swgl_commitTextureNearestColor(R8, s, p, uv_rect, color, __VA_ARGS__)
+
+// Helper function to decide whether we can safely apply 1:1 nearest filtering
+// without diverging too much from the linear filter
+template <typename S, typename T>
+static bool allowTextureNearest(S sampler, T P, int span) {
+ // First verify if the row Y doesn't change across samples
+ if (P.y.x != P.y.y) {
+ return false;
+ }
+ P = samplerScale(sampler, P);
+ // We need to verify that the pixel step reasonably approximates stepping
+ // by a single texel for every pixel we need to reproduce. Try to ensure
+ // that the margin of error is no more than approximately 2^-7.
+ span &= ~(128 - 1);
+ span += 128;
+ return round((P.x.y - P.x.x) * span) == span &&
+ // Also verify that we're reasonably close to the center of a texel
+ // so that it doesn't look that much different than if a linear filter
+ // was used.
+ (int(P.x.x * 4.0f + 0.5f) & 3) == 2 &&
+ (int(P.y.x * 4.0f + 0.5f) & 3) == 2;
+}
+
+// Determine if we can apply 1:1 nearest filtering to a span of texture
+#define swgl_allowTextureNearest(s, p) \
+ allowTextureNearest(s, p, swgl_SpanLength)
+
+// Extension to set a clip mask image to be sampled during blending. The offset
+// specifies the positioning of the clip mask image relative to the viewport
+// origin. The bounding box specifies the rectangle relative to the clip mask's
+// origin that constrains sampling within the clip mask.
+static sampler2D swgl_ClipMask = nullptr;
+static IntPoint swgl_ClipMaskOffset = {0, 0};
+static IntRect swgl_ClipMaskBounds = {0, 0, 0, 0};
+#define swgl_clipMask(mask, offset, bb_origin, bb_size) \
+ do { \
+ if (bb_size != vec2_scalar(0.0f, 0.0f)) { \
+ swgl_ClipMask = mask; \
+ swgl_ClipMaskOffset = make_ivec2(offset); \
+ swgl_ClipMaskBounds = \
+ IntRect(make_ivec2(bb_origin), make_ivec2(bb_size)); \
+ } \
+ } while (0)
+
+// Dispatch helper used by the GLSL translator to swgl_drawSpan functions.
+// The number of pixels committed is tracked by checking for the difference in
+// swgl_SpanLength. Any varying interpolants used will be advanced past the
+// committed part of the span in case the fragment shader must be executed for
+// any remaining pixels that were not committed by the span shader.
+#define DISPATCH_DRAW_SPAN(self, format) \
+ do { \
+ int total = self->swgl_SpanLength; \
+ self->swgl_drawSpan##format(); \
+ int drawn = total - self->swgl_SpanLength; \
+ if (drawn) self->step_interp_inputs(drawn); \
+ while (self->swgl_SpanLength > 0) { \
+ run(self); \
+ commit_span(self->swgl_Out##format, pack_span(self->swgl_Out##format)); \
+ self->swgl_Out##format += swgl_StepSize; \
+ self->swgl_SpanLength -= swgl_StepSize; \
+ } \
+ } while (0)