diff options
Diffstat (limited to 'gfx/wr/swgl/src/swgl_ext.h')
-rw-r--r-- | gfx/wr/swgl/src/swgl_ext.h | 532 |
1 files changed, 532 insertions, 0 deletions
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h new file mode 100644 index 0000000000..fd4e587889 --- /dev/null +++ b/gfx/wr/swgl/src/swgl_ext.h @@ -0,0 +1,532 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +static inline void commit_span(uint32_t* buf, WideRGBA8 r) { + if (blend_key) r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r); + unaligned_store(buf, pack(r)); +} + +static inline void commit_span(uint32_t* buf, PackedRGBA8 r) { + if (blend_key) + r = pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), unpack(r))); + unaligned_store(buf, r); +} + +UNUSED static inline void commit_solid_span(uint32_t* buf, WideRGBA8 r, + int len) { + if (blend_key) { + for (uint32_t* end = &buf[len & ~3]; buf < end; buf += 4) { + unaligned_store( + buf, pack(blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r))); + } + len &= 3; + if (len > 0) { + partial_store_span( + buf, + pack(blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, len), r, + len)), + len); + } + } else { + fill_n(buf, len, bit_cast<U32>(pack(r)).x); + } +} + +static inline void commit_span(uint8_t* buf, WideR8 r) { + if (blend_key) + r = blend_pixels(buf, unpack(unaligned_load<PackedR8>(buf)), r); + unaligned_store(buf, pack(r)); +} + +UNUSED static inline void commit_solid_span(uint8_t* buf, WideR8 r, int len) { + if (blend_key) { + for (uint8_t* end = &buf[len]; buf < end; buf += 4) { + unaligned_store(buf, pack(blend_pixels( + buf, unpack(unaligned_load<PackedR8>(buf)), r))); + } + } else { + fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(pack(r))); + } +} + +template <typename V> +static inline WideRGBA8 pack_span(uint32_t*, const V& v) { + return pack_pixels_RGBA8(v); +} + +static inline WideRGBA8 pack_span(uint32_t*) { return pack_pixels_RGBA8(); } + +template <typename C> +static inline WideR8 pack_span(uint8_t*, C c) { + return pack_pixels_R8(c); +} + +static inline WideR8 pack_span(uint8_t*) { return pack_pixels_R8(); } + +// Forces a value with vector run-class to have scalar run-class. +template <typename T> +static ALWAYS_INLINE auto swgl_forceScalar(T v) -> decltype(force_scalar(v)) { + return force_scalar(v); +} + +// Advance all varying inperpolants by a single chunk +#define swgl_stepInterp() step_interp_inputs() + +// Pseudo-intrinsic that accesses the interpolation step for a given varying +#define swgl_interpStep(v) (interp_step.v) + +// Commit an entire span of a solid color +#define swgl_commitSolid(format, v) \ + do { \ + commit_solid_span(swgl_Out##format, pack_span(swgl_Out##format, (v)), \ + swgl_SpanLength); \ + swgl_Out##format += swgl_SpanLength; \ + swgl_SpanLength = 0; \ + } while (0) +#define swgl_commitSolidRGBA8(v) swgl_commitSolid(RGBA8, v) +#define swgl_commitSolidR8(v) swgl_commitSolid(R8, v) + +#define swgl_commitChunk(format, chunk) \ + do { \ + commit_span(swgl_Out##format, chunk); \ + swgl_Out##format += swgl_StepSize; \ + swgl_SpanLength -= swgl_StepSize; \ + } while (0) + +static inline WideRGBA8 pack_pixels_RGBA8(Float alpha) { + I32 i = round_pixel(alpha); + HalfRGBA8 c = packRGBA8(zipLow(i, i), zipHigh(i, i)); + return combine(zipLow(c, c), zipHigh(c, c)); +} + +static inline WideRGBA8 pack_pixels_RGBA8(float alpha) { + I32 i = round_pixel(alpha); + HalfRGBA8 c = packRGBA8(i, i); + return combine(c, c); +} + +// Commit a single chunk of a color scaled by an alpha weight +#define swgl_commitColor(format, color, alpha) \ + swgl_commitChunk(format, muldiv255(pack_pixels_##format(color), \ + pack_pixels_##format(alpha))) +#define swgl_commitColorRGBA8(color, alpha) \ + swgl_commitColor(RGBA8, color, alpha) +#define swgl_commitColorR8(color, alpha) swgl_commitColor(R8, color, alpha) + +template <typename S> +static ALWAYS_INLINE bool swgl_isTextureLinear(S s) { + return s->filter == TextureFilter::LINEAR; +} + +template <typename S> +static ALWAYS_INLINE bool swgl_isTextureRGBA8(S s) { + return s->format == TextureFormat::RGBA8; +} + +template <typename S> +static ALWAYS_INLINE bool swgl_isTextureR8(S s) { + return s->format == TextureFormat::R8; +} + +// Returns the offset into the texture buffer for the given layer index. If not +// a texture array or 3D texture, this will always access the first layer. +template <typename S> +static ALWAYS_INLINE int swgl_textureLayerOffset(S s, float layer) { + return 0; +} + +UNUSED static ALWAYS_INLINE int swgl_textureLayerOffset(sampler2DArray s, + float layer) { + return clampCoord(int(layer), s->depth) * s->height_stride; +} + +// Use the default linear quantization scale of 128. This gives 7 bits of +// fractional precision, which when multiplied with a signed 9 bit value +// still fits in a 16 bit integer. +const int swgl_LinearQuantizeScale = 128; + +// Quantizes UVs for access into a linear texture. +template <typename S, typename T> +static ALWAYS_INLINE T swgl_linearQuantize(S s, T p) { + return linearQuantize(p, swgl_LinearQuantizeScale, s); +} + +// Quantizes an interpolation step for UVs for access into a linear texture. +template <typename S, typename T> +static ALWAYS_INLINE T swgl_linearQuantizeStep(S s, T p) { + return samplerScale(s, p) * swgl_LinearQuantizeScale; +} + +// Commit a single chunk from a linear texture fetch +#define swgl_commitTextureLinear(format, s, p, ...) \ + swgl_commitChunk(format, \ + textureLinearUnpacked##format(s, ivec2(p), __VA_ARGS__)) +#define swgl_commitTextureLinearRGBA8(s, p, ...) \ + swgl_commitTextureLinear(RGBA8, s, p, __VA_ARGS__) +#define swgl_commitTextureLinearR8(s, p, ...) \ + swgl_commitTextureLinear(R8, s, p, __VA_ARGS__) + +// Commit a single chunk from a linear texture fetch that is scaled by a color +#define swgl_commitTextureLinearColor(format, s, p, color, ...) \ + swgl_commitChunk(format, muldiv255(textureLinearUnpacked##format( \ + s, ivec2(p), __VA_ARGS__), \ + pack_pixels_##format(color))) +#define swgl_commitTextureLinearColorRGBA8(s, p, color, ...) \ + swgl_commitTextureLinearColor(RGBA8, s, p, color, __VA_ARGS__) +#define swgl_commitTextureLinearColorR8(s, p, color, ...) \ + swgl_commitTextureLinearColor(R8, s, p, color, __VA_ARGS__) + +// Commit an entire span of a separable pass of a Gaussian blur that falls +// within the given radius scaled by supplied coefficients, clamped to uv_rect +// bounds. +#define swgl_commitGaussianBlur(format, type, s, p, uv_rect, hori, radius, \ + coeffs, ...) \ + do { \ + vec2_scalar size = {float(s->width), float(s->height)}; \ + ivec2_scalar curUV = make_ivec2(force_scalar(p) * size); \ + ivec4_scalar bounds = make_ivec4(uv_rect * make_vec4(size, size)); \ + int endX = min(bounds.z, curUV.x + swgl_SpanLength * swgl_StepSize); \ + if (hori) { \ + for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) { \ + swgl_commitChunk(format, gaussianBlurHorizontal<type>( \ + s, curUV, bounds.x, bounds.z, radius, \ + coeffs.x, coeffs.y, __VA_ARGS__)); \ + } \ + } else { \ + for (; curUV.x + swgl_StepSize <= endX; curUV.x += swgl_StepSize) { \ + swgl_commitChunk(format, gaussianBlurVertical<type>( \ + s, curUV, bounds.y, bounds.w, radius, \ + coeffs.x, coeffs.y, __VA_ARGS__)); \ + } \ + } \ + } while (0) +#define swgl_commitGaussianBlurRGBA8(s, p, uv_rect, hori, radius, coeffs, ...) \ + swgl_commitGaussianBlur(RGBA8, uint32_t, s, p, uv_rect, hori, radius, \ + coeffs, __VA_ARGS__) +#define swgl_commitGaussianBlurR8(s, p, uv_rect, hori, radius, coeffs, ...) \ + swgl_commitGaussianBlur(R8, uint8_t, s, p, uv_rect, hori, radius, coeffs, \ + __VA_ARGS__) + +// Convert and pack planar YUV samples to RGB output using a color space +static ALWAYS_INLINE PackedRGBA8 convertYUV(int colorSpace, U16 y, U16 u, + U16 v) { + auto yy = V8<int16_t>(zip(y, y)); + auto uv = V8<int16_t>(zip(u, v)); + switch (colorSpace) { + case REC_601: + return YUVConverter<REC_601>::convert(yy, uv); + case REC_709: + return YUVConverter<REC_709>::convert(yy, uv); + case REC_2020: + return YUVConverter<REC_2020>::convert(yy, uv); + default: + return YUVConverter<IDENTITY>::convert(yy, uv); + } +} + +// Helper functions to sample from planar YUV textures before converting to RGB +template <typename S0> +static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0, + int colorSpace, int rescaleFactor) { + ivec2 i0(uv0); + switch (sampler0->format) { + case TextureFormat::RGBA8: { + auto planar = textureLinearPlanarRGBA8(sampler0, i0, layer0); + return convertYUV(colorSpace, highHalf(planar.rg), lowHalf(planar.rg), + lowHalf(planar.ba)); + } + case TextureFormat::YUV422: { + auto planar = textureLinearPlanarYUV422(sampler0, i0, layer0); + return convertYUV(colorSpace, planar.y, planar.u, planar.v); + } + default: + assert(false); + return PackedRGBA8(0); + } +} + +template <typename S0, typename C> +static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0, + int colorSpace, int rescaleFactor, + C color) { + return muldiv255( + unpack(sampleYUV(sampler0, uv0, layer0, colorSpace, rescaleFactor)), + pack_pixels_RGBA8(color)); +} + +template <typename S0, typename S1> +static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0, + S1 sampler1, vec2 uv1, int layer1, + int colorSpace, int rescaleFactor) { + ivec2 i0(uv0); + ivec2 i1(uv1); + switch (sampler1->format) { + case TextureFormat::RG8: { + assert(sampler0->format == TextureFormat::R8); + auto y = textureLinearUnpackedR8(sampler0, i0, layer0); + auto planar = textureLinearPlanarRG8(sampler1, i1, layer1); + return convertYUV(colorSpace, y, lowHalf(planar.rg), highHalf(planar.rg)); + } + case TextureFormat::RGBA8: { + assert(sampler0->format == TextureFormat::R8); + auto y = textureLinearUnpackedR8(sampler0, i0, layer0); + auto planar = textureLinearPlanarRGBA8(sampler1, i1, layer1); + return convertYUV(colorSpace, y, lowHalf(planar.ba), highHalf(planar.rg)); + } + default: + assert(false); + return PackedRGBA8(0); + } +} + +template <typename S0, typename S1, typename C> +static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0, + S1 sampler1, vec2 uv1, int layer1, + int colorSpace, int rescaleFactor, + C color) { + return muldiv255(unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1, + layer1, colorSpace, rescaleFactor)), + pack_pixels_RGBA8(color)); +} + +template <typename S0, typename S1, typename S2> +static inline PackedRGBA8 sampleYUV(S0 sampler0, vec2 uv0, int layer0, + S1 sampler1, vec2 uv1, int layer1, + S2 sampler2, vec2 uv2, int layer2, + int colorSpace, int rescaleFactor) { + ivec2 i0(uv0); + ivec2 i1(uv1); + ivec2 i2(uv2); + assert(sampler0->format == sampler1->format && + sampler0->format == sampler2->format); + switch (sampler0->format) { + case TextureFormat::R8: { + auto y = textureLinearUnpackedR8(sampler0, i0, layer0); + auto u = textureLinearUnpackedR8(sampler1, i1, layer1); + auto v = textureLinearUnpackedR8(sampler2, i2, layer2); + return convertYUV(colorSpace, y, u, v); + } + case TextureFormat::R16: { + // The rescaling factor represents how many bits to add to renormalize the + // texture to 16 bits, and so the color depth is actually 16 minus the + // rescaling factor. + // Need to right shift the sample by the amount of bits over 8 it + // occupies. On output from textureLinearUnpackedR16, we have lost 1 bit + // of precision at the low end already, hence 1 is subtracted from the + // color depth. + int colorDepth = 16 - rescaleFactor; + int rescaleBits = (colorDepth - 1) - 8; + auto y = textureLinearUnpackedR16(sampler0, i0, layer0) >> rescaleBits; + auto u = textureLinearUnpackedR16(sampler1, i1, layer1) >> rescaleBits; + auto v = textureLinearUnpackedR16(sampler2, i2, layer2) >> rescaleBits; + return convertYUV(colorSpace, U16(y), U16(u), U16(v)); + } + default: + assert(false); + return PackedRGBA8(0); + } +} + +template <typename S0, typename S1, typename S2, typename C> +static inline WideRGBA8 sampleColorYUV(S0 sampler0, vec2 uv0, int layer0, + S1 sampler1, vec2 uv1, int layer1, + S2 sampler2, vec2 uv2, int layer2, + int colorSpace, int rescaleFactor, + C color) { + return muldiv255( + unpack(sampleYUV(sampler0, uv0, layer0, sampler1, uv1, layer1, sampler2, + uv2, layer2, colorSpace, rescaleFactor)), + pack_pixels_RGBA8(color)); +} + +// Commit a single chunk of a YUV surface represented by multiple planar +// textures. This requires a color space specifier selecting how to convert +// from YUV to RGB output. In the case of HDR formats, a rescaling factor +// selects how many bits of precision must be utilized on conversion. See the +// sampleYUV dispatcher functions for the various supported plane +// configurations this intrinsic accepts. +#define swgl_commitTextureLinearYUV(...) \ + swgl_commitChunk(RGBA8, sampleYUV(__VA_ARGS__)) +// Commit a single chunk of a YUV surface scaled by a color. +#define swgl_commitTextureLinearColorYUV(...) \ + swgl_commitChunk(RGBA8, sampleColorYUV(__VA_ARGS__)) + +// Helper functions to apply a color modulus when available. +struct NoColor {}; + +SI WideRGBA8 applyColor(WideRGBA8 src, NoColor) { return src; } + +SI WideRGBA8 applyColor(WideRGBA8 src, WideRGBA8 color) { + return muldiv255(src, color); +} + +SI PackedRGBA8 applyColor(PackedRGBA8 src, NoColor) { return src; } + +SI PackedRGBA8 applyColor(PackedRGBA8 src, WideRGBA8 color) { + return pack(muldiv255(unpack(src), color)); +} + +// Samples an axis-aligned span of on a single row of a texture using 1:1 +// nearest filtering. Sampling is constrained to only fall within the given UV +// bounds. This requires a pointer to the destination buffer. An optional color +// modulus can be supplied. +template <typename S, typename C> +static void blendTextureNearestRGBA8(S sampler, const ivec2_scalar& i, int span, + const ivec2_scalar& minUV, + const ivec2_scalar& maxUV, C color, + uint32_t* buf, int layerOffset = 0) { + // Calculate the row pointer within the buffer, clamping to within valid row + // bounds. + uint32_t* row = + &sampler->buf[clamp(clampCoord(i.y, sampler->height), minUV.y, maxUV.y) * + sampler->stride + + layerOffset]; + // Find clamped X bounds within the row. + int minX = clamp(minUV.x, 0, sampler->width - 1); + int maxX = clamp(maxUV.x, minX, sampler->width - 1); + int curX = i.x; + // If we need to start sampling below the valid sample bounds, then we need to + // fill this section with a constant clamped sample. + if (curX < minX) { + int n = min(minX - curX, span); + auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[minX]))), color); + commit_solid_span(buf, src, n); + buf += n; + span -= n; + curX += n; + } + // Here we only deal with valid samples within the sample bounds. No clamping + // should occur here within these inner loops. + int n = clamp(maxX + 1 - curX, 0, span); + span -= n; + // Try to process as many chunks as possible with full loads and stores. + if (blend_key) { + for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) { + auto src = + applyColor(unpack(unaligned_load<PackedRGBA8>(&row[curX])), color); + auto r = blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), src); + unaligned_store(buf, pack(r)); + } + } else { + for (int end = curX + (n & ~3); curX < end; curX += 4, buf += 4) { + auto src = applyColor(unaligned_load<PackedRGBA8>(&row[curX]), color); + unaligned_store(buf, src); + } + } + n &= 3; + // If we have any leftover samples after processing chunks, use partial loads + // and stores. + if (n > 0) { + if (blend_key) { + auto src = applyColor( + unpack(partial_load_span<PackedRGBA8>(&row[curX], n)), color); + auto r = + blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, n), src, n); + partial_store_span(buf, pack(r), n); + } else { + auto src = + applyColor(partial_load_span<PackedRGBA8>(&row[curX], n), color); + partial_store_span(buf, src, n); + } + buf += n; + curX += n; + } + // If we still have samples left above the valid sample bounds, then we again + // need to fill this section with a constant clamped sample. + if (span > 0) { + auto src = applyColor(unpack(bit_cast<PackedRGBA8>(U32(row[maxX]))), color); + commit_solid_span(buf, src, span); + } +} + +// TODO: blendTextureNearestR8 if it is actually needed + +// Commit an entire span of 1:1 nearest texture fetches, potentially scaled by a +// color +#define swgl_commitTextureNearest(format, s, p, uv_rect, color, ...) \ + do { \ + ivec2_scalar i = make_ivec2(samplerScale(s, force_scalar(p))); \ + ivec2_scalar min_uv = \ + make_ivec2(samplerScale(s, vec2_scalar{uv_rect.x, uv_rect.y})); \ + ivec2_scalar max_uv = \ + make_ivec2(samplerScale(s, vec2_scalar{uv_rect.z, uv_rect.w})); \ + blendTextureNearest##format(s, i, swgl_SpanLength, min_uv, max_uv, color, \ + swgl_Out##format, __VA_ARGS__); \ + swgl_Out##format += swgl_SpanLength; \ + swgl_SpanLength = 0; \ + } while (0) +#define swgl_commitTextureNearestRGBA8(s, p, uv_rect, ...) \ + swgl_commitTextureNearest(RGBA8, s, p, uv_rect, NoColor(), __VA_ARGS__) +#define swgl_commitTextureNearestR8(s, p, uv_rect, ...) \ + swgl_commitTextureNearest(R8, s, p, uv_rect, NoColor(), __VA_ARGS__) + +#define swgl_commitTextureNearestColor(format, s, p, uv_rect, color, ...) \ + swgl_commitTextureNearest(format, s, p, uv_rect, \ + pack_pixels_##format(color), __VA_ARGS__) +#define swgl_commitTextureNearestColorRGBA8(s, p, uv_rect, color, ...) \ + swgl_commitTextureNearestColor(RGBA8, s, p, uv_rect, color, __VA_ARGS__) +#define swgl_commitTextureNearestColorR8(s, p, uv_rect, color, ...) \ + swgl_commitTextureNearestColor(R8, s, p, uv_rect, color, __VA_ARGS__) + +// Helper function to decide whether we can safely apply 1:1 nearest filtering +// without diverging too much from the linear filter +template <typename S, typename T> +static bool allowTextureNearest(S sampler, T P, int span) { + // First verify if the row Y doesn't change across samples + if (P.y.x != P.y.y) { + return false; + } + P = samplerScale(sampler, P); + // We need to verify that the pixel step reasonably approximates stepping + // by a single texel for every pixel we need to reproduce. Try to ensure + // that the margin of error is no more than approximately 2^-7. + span &= ~(128 - 1); + span += 128; + return round((P.x.y - P.x.x) * span) == span && + // Also verify that we're reasonably close to the center of a texel + // so that it doesn't look that much different than if a linear filter + // was used. + (int(P.x.x * 4.0f + 0.5f) & 3) == 2 && + (int(P.y.x * 4.0f + 0.5f) & 3) == 2; +} + +// Determine if we can apply 1:1 nearest filtering to a span of texture +#define swgl_allowTextureNearest(s, p) \ + allowTextureNearest(s, p, swgl_SpanLength) + +// Extension to set a clip mask image to be sampled during blending. The offset +// specifies the positioning of the clip mask image relative to the viewport +// origin. The bounding box specifies the rectangle relative to the clip mask's +// origin that constrains sampling within the clip mask. +static sampler2D swgl_ClipMask = nullptr; +static IntPoint swgl_ClipMaskOffset = {0, 0}; +static IntRect swgl_ClipMaskBounds = {0, 0, 0, 0}; +#define swgl_clipMask(mask, offset, bb_origin, bb_size) \ + do { \ + if (bb_size != vec2_scalar(0.0f, 0.0f)) { \ + swgl_ClipMask = mask; \ + swgl_ClipMaskOffset = make_ivec2(offset); \ + swgl_ClipMaskBounds = \ + IntRect(make_ivec2(bb_origin), make_ivec2(bb_size)); \ + } \ + } while (0) + +// Dispatch helper used by the GLSL translator to swgl_drawSpan functions. +// The number of pixels committed is tracked by checking for the difference in +// swgl_SpanLength. Any varying interpolants used will be advanced past the +// committed part of the span in case the fragment shader must be executed for +// any remaining pixels that were not committed by the span shader. +#define DISPATCH_DRAW_SPAN(self, format) \ + do { \ + int total = self->swgl_SpanLength; \ + self->swgl_drawSpan##format(); \ + int drawn = total - self->swgl_SpanLength; \ + if (drawn) self->step_interp_inputs(drawn); \ + while (self->swgl_SpanLength > 0) { \ + run(self); \ + commit_span(self->swgl_Out##format, pack_span(self->swgl_Out##format)); \ + self->swgl_Out##format += swgl_StepSize; \ + self->swgl_SpanLength -= swgl_StepSize; \ + } \ + } while (0) |