1 files changed, 532 insertions, 0 deletions
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
new file mode 100644
index 0000000000..fd4e587889
--- /dev/null
+++ b/gfx/wr/swgl/src/swgl_ext.h
@@ -0,0 +1,532 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+static inline void commit_span(uint32_t* buf, WideRGBA8 r) {
+  if (blend_key) r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r);
+  unaligned_store(buf, pack(r));
+}
+
+static inline void commit_span(uint32_t* buf, PackedRGBA8 r) {
+  if (blend_key)
+    r = pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), unpack(r)));
+  unaligned_store(buf, r);
+}
+
+UNUSED static inline void commit_solid_span(uint32_t* buf, WideRGBA8 r,
+                                            int len) {
+  if (blend_key) {
+    for (uint32_t* end = &buf[len & ~3]; buf < end; buf += 4) {
+      unaligned_store(
+          buf, pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r)));
+    }
+    len &= 3;
+    if (len > 0) {
+      partial_store_span(
+          buf,
+          pack(blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, len), r,
+                            len)),
+          len);
+    }
+  } else {
+    fill_n(buf, len, bit_cast<U32>(pack(r)).x);
+  }
+}
+
+static inline void commit_span(uint8_t* buf, WideR8 r) {
+  if (blend_key)
+    r = blend_pixels(buf, unpack(unaligned_load<PackedR8>(buf)), r);
+  unaligned_store(buf, pack(r));
+}
+
+UNUSED static inline void commit_solid_span(uint8_t* buf, WideR8 r, int len) {
+  if (blend_key) {
+    for (uint8_t* end = &buf[len]; buf < end; buf += 4) {
+      unaligned_store(buf, pack(blend_pixels(
+                               buf, unpack(unaligned_load<PackedR8>(buf)), r)));
+    }
+  } else {
+    fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(pack(r)));
+  }
+}
+
+template <typename V>
+static inline WideRGBA8 pack_span(uint32_t*, const V& v) {
+  return pack_pixels_RGBA8(v);
+}
+
+static inline WideRGBA8 pack_span(uint32_t*) { return pack_pixels_RGBA8(); }
+
+template <typename C>
+static inline WideR8 pack_span(uint8_t*, C c) {
+  return pack_pixels_R8(c);
+}
+
+static inline WideR8 pack_span(uint8_t*) { return pack_pixels_R8(); }
+
+// Forces a value with vector run-class to have scalar run-class.
+template <typename T>
+static ALWAYS_INLINE auto swgl_forceScalar(T v) -> decltype(force_scalar(v)) {
+  return force_scalar(v);
+}
+
+// Advance all varying inperpolants by a single chunk
+#define swgl_stepInterp() step_interp_inputs()
+
+// Pseudo-intrinsic that accesses the interpolation step for a given varying
+#define swgl_interpStep(v) (interp_step.v)
+
+// Commit an entire span of a solid color
+#define swgl_commitSolid(format, v)                                       \
+  do {                                                                    \
+    commit_solid_span(swgl_Out##format, pack_span(swgl_Out##format, (v)), \
+                      swgl_SpanLength);                                   \
+    swgl_Out##format += swgl_SpanLength;                                  \
+    swgl_SpanLength = 0;                                                  \
+  } while (0)
+#define swgl_commitSolidRGBA8(v) swgl_commitSolid(RGBA8, v)
+#define swgl_commitSolidR8(v) swgl_commitSolid(R8, v)
+
+#define swgl_commitChunk(format, chunk)   \
+  do {                                    \
+    commit_span(swgl_Out##format, chunk); \
+    swgl_Out##format += swgl_StepSize;    \
+    swgl_SpanLength -= swgl_StepSize;     \
+  } while (0)
+
+static inline WideRGBA8 pack_pixels_RGBA8(Float alpha) {
+  I32 i = round_pixel(alpha);
+  HalfRGBA8 c = packRGBA8(zipLow(i, i), zipHigh(i, i));
+  return combine(zipLow(c, c), zipHigh(c, c));
+}
+
+static inline WideRGBA8 pack_pixels_RGBA8(float alpha) {
+  I32 i = round_pixel(alpha);
+  HalfRGBA8 c = packRGBA8(i, i);
+  return combine(c, c);
+}
+
+// Commit a single chunk of a color scaled by an alpha weight
+#define swgl_commitColor(format, color, alpha)                    \
+  swgl_commitChunk(format, muldiv255(pack_pixels_##format(color), \
+                                     pack_pixels_##format(alpha)))
+#define swgl_commitColorRGBA8(color, alpha) \
+  swgl_commitColor(RGBA8, color, alpha)
+#define swgl_commitColorR8(color, alpha) swgl_commitColor(R8, color, alpha)
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureLinear(S s) {
+  return s->filter == TextureFilter::LINEAR;
+}
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureRGBA8(S s) {
+  return s->format == TextureFormat::RGBA8;
+}
+
+template <typename S>
+static ALWAYS_INLINE bool swgl_isTextureR8(S s) {
+  return s->format == TextureFormat::R8;
+}
+
+// Returns the offset into the texture buffer for the given layer index. If not
+// a texture array or 3D texture, this will always access the first layer.
+template <typename S>
+static ALWAYS_INLINE int swgl_textureLayerOffset(S s, float layer) {
+  return 0;
+}
+
+UNUSED static ALWAYS_INLINE int swgl_textureLayerOffset(sampler2DArray s,
+                                                        float layer) {
+  return clampCoord(int(layer), s->depth) * s->height_stride;
+}
+
+// Use the default linear quantization scale of 128. This gives 7 bits of
+// fractional precision, which when multiplied with a signed 9 bit value
+// still fits in a 16 bit integer.
+const int swgl_LinearQuantizeScale = 128;
+
+// Quantizes UVs for access into a linear texture.
+template <typename S, typename T>
+static ALWAYS_INLINE T swgl_linearQuantize(S s, T p) {
+  return linearQuantize(p, swgl_LinearQuantizeScale, s);
+}
+
+// Quantizes an interpolation step for UVs for access into a linear texture.
+template <typename S, typename T>
+static ALWAYS_INLINE T swgl_linearQuantizeStep(S s, T p) {
+  return samplerScale(s, p) * swgl_LinearQuantizeScale;
+}
+
+// Commit a single chunk from a linear texture fetch
+#define swgl_commitTextureLinear(format, s, p, ...) \
+  swgl_commitChunk(format,                          \
+                   textureLinearUnpacked##format(s, ivec2(p), __VA_ARGS__))
+#define swgl_commitTextureLinearRGBA8(s, p, ...) \
+  swgl_commitTextureLinear(RGBA8, s, p, __VA_ARGS__)
+#define swgl_commitTextureLinearR8(s, p, ...) \
+  swgl_commitTextureLinear(R8, s, p, __VA_ARGS__)
+
+// Commit a single chunk from a linear texture fetch that is scaled by a color
+#define swgl_commitTextureLinearColor(format, s, p, color, ...)     \
+  swgl_commitChunk(format, muldiv255(textureLinearUnpacked##format( \
+                                         s, ivec2(p), __VA_ARGS__), \
+                                     pack_pixels_##format(color)))
+#define swgl_commitTextureLinearColorRGBA8(s, p, color, ...) \
+  swgl_commitTextureLinearColor(RGBA8, s, p, color, __VA_ARGS__)
+#define swgl_commitTextureLinearColorR8(s, p, color, ...) \
+  swgl_commitTextureLinearColor(R8, s, p, color, __VA_ARGS__)
+
+// Commit an entire span of a separable pass of a Gaussian blur that falls
+// within the given radius scaled by supplied coefficients, clamped to uv_rect
+// bounds.
+#define swgl_commitGaussianBlur(format, type, s, p, uv_rect, hori, radius, \
+                                coeffs, ...)                               \
+  do {                                                                     \
+    vec2_scalar size = {float(s->width), float(s->height)};                \
+    ivec2_scalar curUV = make_ivec2(force_scalar(p) * size);               \
+    ivec4_scalar bounds = make_ivec4(uv_rect * make_vec4(size, size));     \
+    int endX = min(bounds.z, curUV.x + swgl_SpanLength * swgl_StepSize);   \
+    if (hori) {                                                            \
+      for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) {  \
+        swgl_commitChunk(format, gaussianBlurHorizontal<type>(             \
+                                     s, curUV, bounds.x, bounds.z, radius, \
+                                     coeffs.x, coeffs.y, __VA_ARGS__));    \
+      }                                                                    \
+    } else {                                                               \
+      for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) {  \
+        swgl_commitChunk(format, gaussianBlurVertical<type>(               \
+                                     s, curUV, bounds.y, bounds.w, radius, \
+                                     coeffs.x, coeffs.y, __VA_ARGS__));    \
+      }                                                                    \
+    }                                                                      \
+  } while (0)
+#define swgl_commitGaussianBlurRGBA8(s, p, uv_rect, hori, radius, coeffs, ...) \
+  swgl_commitGaussianBlur(RGBA8, uint32_t, s, p, uv_rect, hori, radius,        \
+                          coeffs, __VA_ARGS__)
+#define swgl_commitGaussianBlurR8(s, p, uv_rect, hori, radius, coeffs, ...) \
+  swgl_commitGaussianBlur(R8, uint8_t, s, p, uv_rect, hori, radius, coeffs, \
+                          __VA_ARGS__)
+
+// Convert and pack planar YUV samples to RGB output using a color space
+static ALWAYS_INLINE PackedRGBA8 convertYUV(int colorSpace, U16 y, U16 u,
+                                            U16 v) {
+  auto yy = V8<int16_t>(zip(y, y));
+  auto uv = V8<int16_t>(zip(u, v));
+  switch (colorSpace) {
+    case REC_601:
+      return YUVConverter<REC_601>::convert(yy, uv);
+    case REC_709:
+      return YUVConverter<REC_709>::convert(yy, uv);
+    case REC_2020:
+      return YUVConverter<REC_2020>::convert(yy, uv);
+    default:
+      return YUVConverter<IDENTITY>::convert(yy, uv);
+  }
+}
+
+// Helper functions to sample from planar YUV textures before converting to RGB
+template <typename S0>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+                                    int colorSpace, int rescaleFactor) {
+  ivec2 i0(uv0);
+  switch (sampler0->format) {
+    case TextureFormat::RGBA8: {
+      auto planar = textureLinearPlanarRGBA8(sampler0, i0, layer0);
+      return convertYUV(colorSpace, highHalf(planar.rg), lowHalf(planar.rg),
+                        lowHalf(planar.ba));
+    }
+    case TextureFormat::YUV422: {
+      auto planar = textureLinearPlanarYUV422(sampler0, i0, layer0);
+      return convertYUV(colorSpace, planar.y, planar.u, planar.v);
+    }
+    default:
+      assert(false);
+      return PackedRGBA8(0);
+  }
+}
+
+template <typename S0, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+                                       int colorSpace, int rescaleFactor,
+                                       C color) {
+  return muldiv255(
+      unpack(sampleYUV(sampler0, uv0, layer0, colorSpace, rescaleFactor)),
+      pack_pixels_RGBA8(color));
+}
+
+template <typename S0, typename S1>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+                                    S1 sampler1, vec2 uv1, int layer1,
+                                    int colorSpace, int rescaleFactor) {
+  ivec2 i0(uv0);
+  ivec2 i1(uv1);
+  switch (sampler1->format) {
+    case TextureFormat::RG8: {
+      assert(sampler0->format == TextureFormat::R8);
+      auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+      auto planar = textureLinearPlanarRG8(sampler1, i1, layer1);
+      return convertYUV(colorSpace, y, lowHalf(planar.rg), highHalf(planar.rg));
+    }
+    case TextureFormat::RGBA8: {
+      assert(sampler0->format == TextureFormat::R8);
+      auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+      auto planar = textureLinearPlanarRGBA8(sampler1, i1, layer1);
+      return convertYUV(colorSpace, y, lowHalf(planar.ba), highHalf(planar.rg));
+    }
+    default:
+      assert(false);
+      return PackedRGBA8(0);
+  }
+}
+
+template <typename S0, typename S1, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+                                       S1 sampler1, vec2 uv1, int layer1,
+                                       int colorSpace, int rescaleFactor,
+                                       C color) {
+  return muldiv255(unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1,
+                                    layer1, colorSpace, rescaleFactor)),
+                   pack_pixels_RGBA8(color));
+}
+
+template <typename S0, typename S1, typename S2>
+static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0,
+                                    S1 sampler1, vec2 uv1, int layer1,
+                                    S2 sampler2, vec2 uv2, int layer2,
+                                    int colorSpace, int rescaleFactor) {
+  ivec2 i0(uv0);
+  ivec2 i1(uv1);
+  ivec2 i2(uv2);
+  assert(sampler0->format == sampler1->format &&
+         sampler0->format == sampler2->format);
+  switch (sampler0->format) {
+    case TextureFormat::R8: {
+      auto y = textureLinearUnpackedR8(sampler0, i0, layer0);
+      auto u = textureLinearUnpackedR8(sampler1, i1, layer1);
+      auto v = textureLinearUnpackedR8(sampler2, i2, layer2);
+      return convertYUV(colorSpace, y, u, v);
+    }
+    case TextureFormat::R16: {
+      // The rescaling factor represents how many bits to add to renormalize the
+      // texture to 16 bits, and so the color depth is actually 16 minus the
+      // rescaling factor.
+      // Need to right shift the sample by the amount of bits over 8 it
+      // occupies. On output from textureLinearUnpackedR16, we have lost 1 bit
+      // of precision at the low end already, hence 1 is subtracted from the
+      // color depth.
+      int colorDepth = 16 - rescaleFactor;
+      int rescaleBits = (colorDepth - 1) - 8;
+      auto y = textureLinearUnpackedR16(sampler0, i0, layer0) >> rescaleBits;
+      auto u = textureLinearUnpackedR16(sampler1, i1, layer1) >> rescaleBits;
+      auto v = textureLinearUnpackedR16(sampler2, i2, layer2) >> rescaleBits;
+      return convertYUV(colorSpace, U16(y), U16(u), U16(v));
+    }
+    default:
+      assert(false);
+      return PackedRGBA8(0);
+  }
+}
+
+template <typename S0, typename S1, typename S2, typename C>
+static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0,
+                                       S1 sampler1, vec2 uv1, int layer1,
+                                       S2 sampler2, vec2 uv2, int layer2,
+                                       int colorSpace, int rescaleFactor,
+                                       C color) {
+  return muldiv255(
+      unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1, layer1, sampler2,
+                       uv2, layer2, colorSpace, rescaleFactor)),
+      pack_pixels_RGBA8(color));
+}
+
+// Commit a single chunk of a YUV surface represented by multiple planar
+// textures. This requires a color space specifier selecting how to convert
+// from YUV to RGB output. In the case of HDR formats, a rescaling factor
+// selects how many bits of precision must be utilized on conversion. See the
+// sampleYUV dispatcher functions for the various supported plane
+// configurations this intrinsic accepts.
+#define swgl_commitTextureLinearYUV(...) \
+  swgl_commitChunk(RGBA8, sampleYUV(__VA_ARGS__))
+// Commit a single chunk of a YUV surface scaled by a color.
+#define swgl_commitTextureLinearColorYUV(...) \
+  swgl_commitChunk(RGBA8, sampleColorYUV(__VA_ARGS__))
+
+// Helper functions to apply a color modulus when available.
+struct NoColor {};
+
+SI WideRGBA8 applyColor(WideRGBA8 src, NoColor) { return src; }
+
+SI WideRGBA8 applyColor(WideRGBA8 src, WideRGBA8 color) {
+  return muldiv255(src, color);
+}
+
+SI PackedRGBA8 applyColor(PackedRGBA8 src, NoColor) { return src; }
+
+SI PackedRGBA8 applyColor(PackedRGBA8 src, WideRGBA8 color) {
+  return pack(muldiv255(unpack(src), color));
+}
+
+// Samples an axis-aligned span of on a single row of a texture using 1:1
+// nearest filtering. Sampling is constrained to only fall within the given UV
+// bounds. This requires a pointer to the destination buffer. An optional color
+// modulus can be supplied.
+template <typename S, typename C>
+static void blendTextureNearestRGBA8(S sampler, const ivec2_scalar& i, int span,
+                                     const ivec2_scalar& minUV,
+                                     const ivec2_scalar& maxUV, C color,
+                                     uint32_t* buf, int layerOffset = 0) {
+  // Calculate the row pointer within the buffer, clamping to within valid row
+  // bounds.
+  uint32_t* row =
+      &sampler->buf[clamp(clampCoord(i.y, sampler->height), minUV.y, maxUV.y) *
+                        sampler->stride +
+                    layerOffset];
+  // Find clamped X bounds within the row.
+  int minX = clamp(minUV.x, 0, sampler->width - 1);
+  int maxX = clamp(maxUV.x, minX, sampler->width - 1);
+  int curX = i.x;
+  // If we need to start sampling below the valid sample bounds, then we need to
+  // fill this section with a constant clamped sample.
+  if (curX < minX) {
+    int n = min(minX - curX, span);
+    auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[minX]))), color);
+    commit_solid_span(buf, src, n);
+    buf += n;
+    span -= n;
+    curX += n;
+  }
+  // Here we only deal with valid samples within the sample bounds. No clamping
+  // should occur here within these inner loops.
+  int n = clamp(maxX + 1 - curX, 0, span);
+  span -= n;
+  // Try to process as many chunks as possible with full loads and stores.
+  if (blend_key) {
+    for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) {
+      auto src =
+          applyColor(unpack(unaligned_load<PackedRGBA8>(&row[curX])), color);
+      auto r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), src);
+      unaligned_store(buf, pack(r));
+    }
+  } else {
+    for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) {
+      auto src = applyColor(unaligned_load<PackedRGBA8>(&row[curX]), color);
+      unaligned_store(buf, src);
+    }
+  }
+  n &= 3;
+  // If we have any leftover samples after processing chunks, use partial loads
+  // and stores.
+  if (n > 0) {
+    if (blend_key) {
+      auto src = applyColor(
+          unpack(partial_load_span<PackedRGBA8>(&row[curX], n)), color);
+      auto r =
+          blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, n), src, n);
+      partial_store_span(buf, pack(r), n);
+    } else {
+      auto src =
+          applyColor(partial_load_span<PackedRGBA8>(&row[curX], n), color);
+      partial_store_span(buf, src, n);
+    }
+    buf += n;
+    curX += n;
+  }
+  // If we still have samples left above the valid sample bounds, then we again
+  // need to fill this section with a constant clamped sample.
+  if (span > 0) {
+    auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[maxX]))), color);
+    commit_solid_span(buf, src, span);
+  }
+}
+
+// TODO: blendTextureNearestR8 if it is actually needed
+
+// Commit an entire span of 1:1 nearest texture fetches, potentially scaled by a
+// color
+#define swgl_commitTextureNearest(format, s, p, uv_rect, color, ...)          \
+  do {                                                                        \
+    ivec2_scalar i = make_ivec2(samplerScale(s, force_scalar(p)));            \
+    ivec2_scalar min_uv =                                                     \
+        make_ivec2(samplerScale(s, vec2_scalar{uv_rect.x, uv_rect.y}));       \
+    ivec2_scalar max_uv =                                                     \
+        make_ivec2(samplerScale(s, vec2_scalar{uv_rect.z, uv_rect.w}));       \
+    blendTextureNearest##format(s, i, swgl_SpanLength, min_uv, max_uv, color, \
+                                swgl_Out##format, __VA_ARGS__);               \
+    swgl_Out##format += swgl_SpanLength;                                      \
+    swgl_SpanLength = 0;                                                      \
+  } while (0)
+#define swgl_commitTextureNearestRGBA8(s, p, uv_rect, ...) \
+  swgl_commitTextureNearest(RGBA8, s, p, uv_rect, NoColor(), __VA_ARGS__)
+#define swgl_commitTextureNearestR8(s, p, uv_rect, ...) \
+  swgl_commitTextureNearest(R8, s, p, uv_rect, NoColor(), __VA_ARGS__)
+
+#define swgl_commitTextureNearestColor(format, s, p, uv_rect, color, ...) \
+  swgl_commitTextureNearest(format, s, p, uv_rect,                        \
+                            pack_pixels_##format(color), __VA_ARGS__)
+#define swgl_commitTextureNearestColorRGBA8(s, p, uv_rect, color, ...) \
+  swgl_commitTextureNearestColor(RGBA8, s, p, uv_rect, color, __VA_ARGS__)
+#define swgl_commitTextureNearestColorR8(s, p, uv_rect, color, ...) \
+  swgl_commitTextureNearestColor(R8, s, p, uv_rect, color, __VA_ARGS__)
+
+// Helper function to decide whether we can safely apply 1:1 nearest filtering
+// without diverging too much from the linear filter
+template <typename S, typename T>
+static bool allowTextureNearest(S sampler, T P, int span) {
+  // First verify if the row Y doesn't change across samples
+  if (P.y.x != P.y.y) {
+    return false;
+  }
+  P = samplerScale(sampler, P);
+  // We need to verify that the pixel step reasonably approximates stepping
+  // by a single texel for every pixel we need to reproduce. Try to ensure
+  // that the margin of error is no more than approximately 2^-7.
+  span &= ~(128 - 1);
+  span += 128;
+  return round((P.x.y - P.x.x) * span) == span &&
+         // Also verify that we're reasonably close to the center of a texel
+         // so that it doesn't look that much different than if a linear filter
+         // was used.
+         (int(P.x.x * 4.0f + 0.5f) & 3) == 2 &&
+         (int(P.y.x * 4.0f + 0.5f) & 3) == 2;
+}
+
+// Determine if we can apply 1:1 nearest filtering to a span of texture
+#define swgl_allowTextureNearest(s, p) \
+  allowTextureNearest(s, p, swgl_SpanLength)
+
+// Extension to set a clip mask image to be sampled during blending. The offset
+// specifies the positioning of the clip mask image relative to the viewport
+// origin. The bounding box specifies the rectangle relative to the clip mask's
+// origin that constrains sampling within the clip mask.
+static sampler2D swgl_ClipMask = nullptr;
+static IntPoint swgl_ClipMaskOffset = {0, 0};
+static IntRect swgl_ClipMaskBounds = {0, 0, 0, 0};
+#define swgl_clipMask(mask, offset, bb_origin, bb_size)        \
+  do {                                                         \
+    if (bb_size != vec2_scalar(0.0f, 0.0f)) {                  \
+      swgl_ClipMask = mask;                                    \
+      swgl_ClipMaskOffset = make_ivec2(offset);                \
+      swgl_ClipMaskBounds =                                    \
+          IntRect(make_ivec2(bb_origin), make_ivec2(bb_size)); \
+    }                                                          \
+  } while (0)
+
+// Dispatch helper used by the GLSL translator to swgl_drawSpan functions.
+// The number of pixels committed is tracked by checking for the difference in
+// swgl_SpanLength. Any varying interpolants used will be advanced past the
+// committed part of the span in case the fragment shader must be executed for
+// any remaining pixels that were not committed by the span shader.
+#define DISPATCH_DRAW_SPAN(self, format)                                      \
+  do {                                                                        \
+    int total = self->swgl_SpanLength;                                        \
+    self->swgl_drawSpan##format();                                            \
+    int drawn = total - self->swgl_SpanLength;                                \
+    if (drawn) self->step_interp_inputs(drawn);                               \
+    while (self->swgl_SpanLength > 0) {                                       \
+      run(self);                                                              \
+      commit_span(self->swgl_Out##format, pack_span(self->swgl_Out##format)); \
+      self->swgl_Out##format += swgl_StepSize;                                \
+      self->swgl_SpanLength -= swgl_StepSize;                                 \
+    }                                                                         \
+  } while (0)