49 files changed, 7596 insertions, 0 deletions
diff --git a/gfx/wr/webrender/res/Proggy.ttf b/gfx/wr/webrender/res/Proggy.ttf
new file mode 100644
index 0000000000..308d3e1ac9
--- /dev/null
+++ b/gfx/wr/webrender/res/Proggy.ttf
diff --git a/gfx/wr/webrender/res/area-lut.tga b/gfx/wr/webrender/res/area-lut.tga
new file mode 100644
index 0000000000..5edcddc3d1
--- /dev/null
+++ b/gfx/wr/webrender/res/area-lut.tga
diff --git a/gfx/wr/webrender/res/base.glsl b/gfx/wr/webrender/res/base.glsl
new file mode 100644
index 0000000000..e381ff6ca9
--- /dev/null
+++ b/gfx/wr/webrender/res/base.glsl
@@ -0,0 +1,70 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if defined(GL_ES)
+    #if GL_ES == 1
+        // Sampler default precision is lowp on mobile GPUs.
+        // This causes RGBA32F texture data to be clamped to 16 bit floats on some GPUs (e.g. Mali-T880).
+        // Define highp precision macro to allow lossless FLOAT texture sampling.
+        #define HIGHP_SAMPLER_FLOAT highp
+
+        // Default int precision in GLES 3 is highp (32 bits) in vertex shaders
+        // and mediump (16 bits) in fragment shaders. If an int is being used as
+        // a texel address in a fragment shader it, and therefore requires > 16
+        // bits, it must be qualified with this.
+        #define HIGHP_FS_ADDRESS highp
+
+        // texelFetchOffset is buggy on some Android GPUs (see issue #1694).
+        // Fallback to texelFetch on mobile GPUs.
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)
+    #else
+        #define HIGHP_SAMPLER_FLOAT
+        #define HIGHP_FS_ADDRESS
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)
+    #endif
+#else
+    #define HIGHP_SAMPLER_FLOAT
+    #define HIGHP_FS_ADDRESS
+    #if defined(PLATFORM_MACOS) && !defined(SWGL)
+        // texelFetchOffset introduces a variety of shader compilation bugs on macOS Intel so avoid it.
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetch(sampler, position + offset, lod)
+    #else
+        #define TEXEL_FETCH(sampler, position, lod, offset) texelFetchOffset(sampler, position, lod, offset)
+    #endif
+#endif
+
+#ifdef SWGL
+    #define SWGL_DRAW_SPAN
+    #define SWGL_CLIP_MASK
+    #define SWGL_ANTIALIAS
+    #define SWGL_BLEND
+    #define SWGL_CLIP_DIST
+#endif
+
+#ifdef WR_VERTEX_SHADER
+    #ifdef SWGL
+        // Annotate a vertex attribute as being flat per each drawn primitive instance.
+        // SWGL can use this information to avoid redundantly loading the attribute in all SIMD lanes.
+        #define PER_INSTANCE flat
+    #else
+        #define PER_INSTANCE
+    #endif
+
+    #if __VERSION__ != 100
+        #define varying out
+        #define attribute in
+    #endif
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+    precision highp float;
+    #if __VERSION__ != 100
+        #define varying in
+    #endif
+#endif
+
+// Flat interpolation is not supported on ESSL 1
+#if __VERSION__ == 100
+    #define flat
+#endif
diff --git a/gfx/wr/webrender/res/blend.glsl b/gfx/wr/webrender/res/blend.glsl
new file mode 100644
index 0000000000..2deed01143
--- /dev/null
+++ b/gfx/wr/webrender/res/blend.glsl
@@ -0,0 +1,238 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define COMPONENT_TRANSFER_IDENTITY 0
+#define COMPONENT_TRANSFER_TABLE 1
+#define COMPONENT_TRANSFER_DISCRETE 2
+#define COMPONENT_TRANSFER_LINEAR 3
+#define COMPONENT_TRANSFER_GAMMA 4
+
+// Must be kept in sync with `Filter::as_int` in internal_types.rs
+// Not all filters are defined here because some filter use different shaders.
+#define FILTER_CONTRAST            0
+#define FILTER_GRAYSCALE           1
+#define FILTER_HUE_ROTATE          2
+#define FILTER_INVERT              3
+#define FILTER_SATURATE            4
+#define FILTER_SEPIA               5
+#define FILTER_BRIGHTNESS          6
+#define FILTER_COLOR_MATRIX        7
+#define FILTER_SRGB_TO_LINEAR      8
+#define FILTER_LINEAR_TO_SRGB      9
+#define FILTER_FLOOD               10
+#define FILTER_COMPONENT_TRANSFER  11
+
+#ifdef WR_VERTEX_SHADER
+void SetupFilterParams(
+    int op,
+    float amount,
+    int gpu_data_address,
+    out vec4 color_offset,
+    out mat4 color_mat,
+    out highp int table_address
+) {
+    float lumR = 0.2126;
+    float lumG = 0.7152;
+    float lumB = 0.0722;
+    float oneMinusLumR = 1.0 - lumR;
+    float oneMinusLumG = 1.0 - lumG;
+    float oneMinusLumB = 1.0 - lumB;
+    float invAmount = 1.0 - amount;
+
+    if (op == FILTER_GRAYSCALE) {
+        color_mat = mat4(
+            vec4(lumR + oneMinusLumR * invAmount, lumR - lumR * invAmount, lumR - lumR * invAmount, 0.0),
+            vec4(lumG - lumG * invAmount, lumG + oneMinusLumG * invAmount, lumG - lumG * invAmount, 0.0),
+            vec4(lumB - lumB * invAmount, lumB - lumB * invAmount, lumB + oneMinusLumB * invAmount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op ==  FILTER_HUE_ROTATE) {
+        float c = cos(amount);
+        float s = sin(amount);
+        color_mat = mat4(
+            vec4(lumR + oneMinusLumR * c - lumR * s, lumR - lumR * c + 0.143 * s, lumR - lumR * c - oneMinusLumR * s, 0.0),
+            vec4(lumG - lumG * c - lumG * s, lumG + oneMinusLumG * c + 0.140 * s, lumG - lumG * c + lumG * s, 0.0),
+            vec4(lumB - lumB * c + oneMinusLumB * s, lumB - lumB * c - 0.283 * s, lumB + oneMinusLumB * c + lumB * s, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op ==   FILTER_SATURATE) {
+        color_mat = mat4(
+            vec4(invAmount * lumR + amount, invAmount * lumR, invAmount * lumR, 0.0),
+            vec4(invAmount * lumG, invAmount * lumG + amount, invAmount * lumG, 0.0),
+            vec4(invAmount * lumB, invAmount * lumB, invAmount * lumB + amount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op == FILTER_SEPIA) {
+        color_mat = mat4(
+            vec4(0.393 + 0.607 * invAmount, 0.349 - 0.349 * invAmount, 0.272 - 0.272 * invAmount, 0.0),
+            vec4(0.769 - 0.769 * invAmount, 0.686 + 0.314 * invAmount, 0.534 - 0.534 * invAmount, 0.0),
+            vec4(0.189 - 0.189 * invAmount, 0.168 - 0.168 * invAmount, 0.131 + 0.869 * invAmount, 0.0),
+            vec4(0.0, 0.0, 0.0, 1.0)
+        );
+        color_offset = vec4(0.0);
+    } else if (op == FILTER_COLOR_MATRIX) {
+        vec4 mat_data[4] = fetch_from_gpu_cache_4(gpu_data_address);
+        vec4 offset_data = fetch_from_gpu_cache_1(gpu_data_address + 4);
+        color_mat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
+        color_offset = offset_data;
+    } else if (op == FILTER_COMPONENT_TRANSFER) {
+        table_address = gpu_data_address;
+    } else if (op == FILTER_FLOOD) {
+        color_offset = fetch_from_gpu_cache_1(gpu_data_address);
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec3 Contrast(vec3 Cs, float amount) {
+    return clamp(Cs.rgb * amount - 0.5 * amount + 0.5, 0.0, 1.0);
+}
+
+vec3 Invert(vec3 Cs, float amount) {
+    return mix(Cs.rgb, vec3(1.0) - Cs.rgb, amount);
+}
+
+vec3 Brightness(vec3 Cs, float amount) {
+    // Apply the brightness factor.
+    // Resulting color needs to be clamped to output range
+    // since we are pre-multiplying alpha in the shader.
+    return clamp(Cs.rgb * amount, vec3(0.0), vec3(1.0));
+}
+
+// Based on the Gecko's implementation in
+// https://hg.mozilla.org/mozilla-central/file/91b4c3687d75/gfx/src/FilterSupport.cpp#l24
+// These could be made faster by sampling a lookup table stored in a float texture
+// with linear interpolation.
+
+vec3 SrgbToLinear(vec3 color) {
+    vec3 c1 = color / 12.92;
+    vec3 c2 = pow(color / 1.055 + vec3(0.055 / 1.055), vec3(2.4));
+    return if_then_else(lessThanEqual(color, vec3(0.04045)), c1, c2);
+}
+
+vec3 LinearToSrgb(vec3 color) {
+    vec3 c1 = color * 12.92;
+    vec3 c2 = vec3(1.055) * pow(color, vec3(1.0 / 2.4)) - vec3(0.055);
+    return if_then_else(lessThanEqual(color, vec3(0.0031308)), c1, c2);
+}
+
+// This function has to be factored out due to the following issue:
+// https://github.com/servo/webrender/wiki/Driver-issues#bug-1532245---switch-statement-inside-control-flow-inside-switch-statement-fails-to-compile-on-some-android-phones
+// (and now the words "default: default:" so angle_shader_validation.rs passes)
+vec4 ComponentTransfer(vec4 colora, vec4 vfuncs, highp int table_address) {
+    // We push a different amount of data to the gpu cache depending on the
+    // function type.
+    // Identity => 0 blocks
+    // Table/Discrete => 64 blocks (256 values)
+    // Linear => 1 block (2 values)
+    // Gamma => 1 block (3 values)
+    // We loop through the color components and increment the offset (for the
+    // next color component) into the gpu cache based on how many blocks that
+    // function type put into the gpu cache.
+    // Table/Discrete use a 256 entry look up table.
+    // Linear/Gamma are a simple calculation.
+
+    // Both offset and k must be marked as highp due to a Adreno 3xx bug likely
+    // to do with converting between precisions (as they would otherwise be
+    // promoted when adding to table_address).
+    highp int offset = 0;
+    highp int k;
+
+    vec4 texel;
+
+    // Dynamically indexing a vector is buggy on some platforms, so use a temporary array
+    int[4] funcs = int[4](int(vfuncs.r), int(vfuncs.g), int(vfuncs.b), int(vfuncs.a));
+    for (int i = 0; i < 4; i++) {
+        switch (funcs[i]) {
+            case COMPONENT_TRANSFER_IDENTITY:
+                break;
+            case COMPONENT_TRANSFER_TABLE:
+            case COMPONENT_TRANSFER_DISCRETE: {
+                // fetch value from lookup table
+                k = int(floor(colora[i]*255.0 + 0.5));
+                texel = fetch_from_gpu_cache_1(table_address + offset + k/4);
+                colora[i] = clamp(texel[k % 4], 0.0, 1.0);
+                // offset plus 256/4 blocks
+                offset = offset + 64;
+                break;
+            }
+            case COMPONENT_TRANSFER_LINEAR: {
+                // fetch the two values for use in the linear equation
+                texel = fetch_from_gpu_cache_1(table_address + offset);
+                colora[i] = clamp(texel[0] * colora[i] + texel[1], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            }
+            case COMPONENT_TRANSFER_GAMMA: {
+                // fetch the three values for use in the gamma equation
+                texel = fetch_from_gpu_cache_1(table_address + offset);
+                colora[i] = clamp(texel[0] * pow(colora[i], texel[1]) + texel[2], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            }
+            default:
+                // shouldn't happen
+                break;
+        }
+    }
+    return colora;
+}
+
+void CalculateFilter(
+    vec4 Cs,
+    int op,
+    float amount,
+    highp int table_address,
+    vec4 color_offset,
+    mat4 color_mat,
+    vec4 v_funcs,
+    out vec3 color,
+    out float alpha
+) {
+    // Un-premultiply the input.
+    alpha = Cs.a;
+    color = alpha != 0.0 ? Cs.rgb / alpha : Cs.rgb;
+
+    switch (op) {
+        case FILTER_CONTRAST:
+            color = Contrast(color, amount);
+            break;
+        case FILTER_INVERT:
+            color = Invert(color, amount);
+            break;
+        case FILTER_BRIGHTNESS:
+            color = Brightness(color, amount);
+            break;
+        case FILTER_SRGB_TO_LINEAR:
+            color = SrgbToLinear(color);
+            break;
+        case FILTER_LINEAR_TO_SRGB:
+            color = LinearToSrgb(color);
+            break;
+        case FILTER_COMPONENT_TRANSFER: {
+            // Get the unpremultiplied color with alpha.
+            vec4 colora = vec4(color, alpha);
+            colora = ComponentTransfer(colora, v_funcs, table_address);
+            color = colora.rgb;
+            alpha = colora.a;
+            break;
+        }
+        case FILTER_FLOOD:
+            color = color_offset.rgb;
+            alpha = color_offset.a;
+            break;
+        default:
+            // Color matrix type filters (sepia, hue-rotate, etc...)
+            vec4 result = color_mat * vec4(color, alpha) + color_offset;
+            result = clamp(result, vec4(0.0), vec4(1.0));
+            color = result.rgb;
+            alpha = result.a;
+    }
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush.glsl b/gfx/wr/webrender/res/brush.glsl
new file mode 100644
index 0000000000..8d0d52ce82
--- /dev/null
+++ b/gfx/wr/webrender/res/brush.glsl
@@ -0,0 +1,258 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/// # Brush vertex shaders memory layout
+///
+/// The overall memory layout is the same for all brush shaders.
+///
+/// The vertex shader receives a minimal amount of data from vertex attributes (packed into a single
+/// ivec4 per instance) and the rest is fetched from various uniform samplers using offsets decoded
+/// from the vertex attributes.
+///
+/// The diagram below shows the the various pieces of data fectched in the vertex shader:
+///
+///```ascii
+///                                                                         (sPrimitiveHeadersI)
+///                          (VBO)                                     +-----------------------+
+/// +----------------------------+      +----------------------------> | Int header            |
+/// | Instance vertex attributes |      |        (sPrimitiveHeadersF)  |                       |
+/// |                            |      |     +---------------------+  |   z                   |
+/// | x: prim_header_address    +-------+---> | Float header        |  |   specific_address  +-----+
+/// | y: picture_task_address   +---------+   |                     |  |   transform_address +---+ |
+/// |    clip_address           +-----+   |   |    local_rect       |  |   user_data           | | |
+/// | z: flags                   |    |   |   |    local_clip_rect  |  +-----------------------+ | |
+/// |    segment_index           |    |   |   +---------------------+                            | |
+/// | w: resource_address       +--+  |   |                                                      | |
+/// +----------------------------+ |  |   |                                 (sGpuCache)          | |
+///                                |  |   |         (sGpuCache)          +------------+          | |
+///                                |  |   |   +---------------+          | Transform  | <--------+ |
+///                (sGpuCache)     |  |   +-> | Picture task  |          +------------+            |
+///            +-------------+     |  |       |               |                                    |
+///            |  Resource   | <---+  |       |         ...   |                                    |
+///            |             |        |       +---------------+   +--------------------------------+
+///            |             |        |                           |
+///            +-------------+        |             (sGpuCache)   v                        (sGpuCache)
+///                                   |       +---------------+  +--------------+---------------+-+-+
+///                                   +-----> | Clip area     |  | Brush data   |  Segment data | | |
+///                                           |               |  |              |               | | |
+///                                           |         ...   |  |         ...  |          ...  | | | ...
+///                                           +---------------+  +--------------+---------------+-+-+
+///```
+///
+/// - Segment data address is obtained by combining the address stored in the int header and the
+///   segment index decoded from the vertex attributes.
+/// - Resource data is optional, some brush types (such as images) store some extra data there while
+///   other brush types don't use it.
+///
+
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+varying highp vec2 v_local_pos;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 segment_data
+);
+
+// Forward-declare the text vertex shader entry point which is currently
+// different from other brushes.
+void text_shader_main(
+    Instance instance,
+    PrimitiveHeader ph,
+    Transform transform,
+    PictureTask task,
+    ClipArea clip_area
+);
+
+#define VECS_PER_SEGMENT                    2
+
+#define BRUSH_FLAG_PERSPECTIVE_INTERPOLATION    1
+#define BRUSH_FLAG_SEGMENT_RELATIVE             2
+#define BRUSH_FLAG_SEGMENT_REPEAT_X             4
+#define BRUSH_FLAG_SEGMENT_REPEAT_Y             8
+#define BRUSH_FLAG_SEGMENT_REPEAT_X_ROUND      16
+#define BRUSH_FLAG_SEGMENT_REPEAT_Y_ROUND      32
+#define BRUSH_FLAG_SEGMENT_REPEAT_X_CENTERED   64
+#define BRUSH_FLAG_SEGMENT_REPEAT_Y_CENTERED  128
+#define BRUSH_FLAG_SEGMENT_NINEPATCH_MIDDLE   256
+#define BRUSH_FLAG_TEXEL_RECT                 512
+#define BRUSH_FLAG_FORCE_AA                  1024
+
+#define INVALID_SEGMENT_INDEX                   0xffff
+
+void brush_shader_main_vs(
+    Instance instance,
+    PrimitiveHeader ph,
+    Transform transform,
+    PictureTask pic_task,
+    ClipArea clip_area
+) {
+    int edge_flags = (instance.flags >> 12) & 0xf;
+    int brush_flags = instance.flags & 0xfff;
+
+    // Fetch the segment of this brush primitive we are drawing.
+    vec4 segment_data;
+    RectWithEndpoint segment_rect;
+    if (instance.segment_index == INVALID_SEGMENT_INDEX) {
+        segment_rect = ph.local_rect;
+        segment_data = vec4(0.0);
+    } else {
+        int segment_address = ph.specific_prim_address +
+                              VECS_PER_SPECIFIC_BRUSH +
+                              instance.segment_index * VECS_PER_SEGMENT;
+
+        vec4[2] segment_info = fetch_from_gpu_cache_2(segment_address);
+        segment_rect = RectWithEndpoint(segment_info[0].xy, segment_info[0].zw);
+        segment_rect.p0 += ph.local_rect.p0;
+        segment_rect.p1 += ph.local_rect.p0;
+        segment_data = segment_info[1];
+    }
+
+    // Most of the time this is the segment rect, but when doing the edge AA
+    // it is inflated.
+    RectWithEndpoint adjusted_segment_rect = segment_rect;
+
+    bool antialiased = !transform.is_axis_aligned || ((brush_flags & BRUSH_FLAG_FORCE_AA) != 0);
+
+    // Write the normal vertex information out.
+    if (antialiased) {
+        adjusted_segment_rect = clip_and_init_antialiasing(
+            segment_rect,
+            ph.local_rect,
+            ph.local_clip_rect,
+            edge_flags,
+            ph.z,
+            transform,
+            pic_task
+        );
+
+        // The clip was taken into account in clip_and_init_antialiasing, remove
+        // it so that it doesn't interfere with the aa.
+        ph.local_clip_rect.p0 = vec2(-1.0e16);
+        ph.local_clip_rect.p1 = vec2(1.0e16);
+    } else {
+        // The common case for most CSS content.
+
+        // TODO(gw): transform bounds may be referenced by
+        //           the fragment shader when running in
+        //           the alpha pass, even on non-transformed
+        //           items. For now, just ensure it has no
+        //           effect. We can tidy this up as we move
+        //           more items to be brush shaders.
+#if defined(WR_FEATURE_ALPHA_PASS) && !defined(SWGL_ANTIALIAS)
+        init_transform_vs(vec4(vec2(-1.0e16), vec2(1.0e16)));
+#endif
+    }
+
+    // Select the corner of the local rect that we are processing.
+    vec2 local_pos = mix(adjusted_segment_rect.p0, adjusted_segment_rect.p1, aPosition.xy);
+
+    VertexInfo vi = write_vertex(
+        local_pos,
+        ph.local_clip_rect,
+        ph.z,
+        transform,
+        pic_task
+    );
+
+    // For brush instances in the alpha pass, always write
+    // out clip information.
+    // TODO(gw): It's possible that we might want alpha
+    //           shaders that don't clip in the future,
+    //           but it's reasonable to assume that one
+    //           implies the other, for now.
+    // SW-WR may decay some requests for alpha-pass shaders to
+    // the opaque version if only the clip-mask is required. In
+    // that case the opaque vertex shader must still write out
+    // the clip information, which is cheap to do for SWGL.
+#if defined(WR_FEATURE_ALPHA_PASS) || defined(SWGL_CLIP_MASK)
+    write_clip(
+        vi.world_pos,
+        clip_area,
+        pic_task
+    );
+#endif
+
+    // Run the specific brush VS code to write interpolators.
+    brush_vs(
+        vi,
+        ph.specific_prim_address,
+        ph.local_rect,
+        segment_rect,
+        ph.user_data,
+        instance.resource_address,
+        transform.m,
+        pic_task,
+        brush_flags,
+        segment_data
+    );
+
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+    v_local_pos = vi.local_pos;
+#endif
+}
+
+#ifndef WR_VERTEX_SHADER_MAIN_FUNCTION
+// If the entry-point was not overridden before including the brush shader,
+// use the default one.
+#define WR_VERTEX_SHADER_MAIN_FUNCTION brush_shader_main_vs
+#endif
+
+void main(void) {
+
+    Instance instance = decode_instance_attributes();
+    PrimitiveHeader ph = fetch_prim_header(instance.prim_header_address);
+    Transform transform = fetch_transform(ph.transform_id);
+    PictureTask task = fetch_picture_task(instance.picture_task_address);
+    ClipArea clip_area = fetch_clip_area(instance.clip_address);
+
+    WR_VERTEX_SHADER_MAIN_FUNCTION(instance, ph, transform, task, clip_area);
+}
+
+#endif // WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+float antialias_brush() {
+#if (defined(WR_FEATURE_ALPHA_PASS) || defined(WR_FEATURE_ANTIALIASING)) && !defined(SWGL_ANTIALIAS)
+    return init_transform_fs(v_local_pos);
+#else
+    return 1.0;
+#endif
+}
+
+Fragment brush_fs();
+
+void main(void) {
+#ifdef WR_FEATURE_DEBUG_OVERDRAW
+    oFragColor = WR_DEBUG_OVERDRAW_COLOR;
+#else
+
+    Fragment frag = brush_fs();
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    // Apply the clip mask
+    float clip_alpha = do_clip();
+
+    frag.color *= clip_alpha;
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        oFragBlend = frag.blend * clip_alpha;
+    #endif
+#endif
+
+    write_output(frag.color);
+#endif
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_blend.glsl b/gfx/wr/webrender/res/brush_blend.glsl
new file mode 100644
index 0000000000..49f047edbd
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_blend.glsl
@@ -0,0 +1,121 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush,blend
+
+// Interpolated UV coordinates to sample.
+varying highp vec2 v_uv;
+
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying highp vec4 v_uv_sample_bounds;
+
+// x: Flag to allow perspective interpolation of UV.
+// y: Filter-dependent "amount" parameter.
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump vec2 v_perspective_amount;
+#define v_perspective v_perspective_amount.x
+#define v_amount v_perspective_amount.y
+
+// x: Blend op, y: Lookup table GPU cache address.
+// Packed in to a vector to work around bug 1630356.
+// Must be explicitly marked as highp, as the default integer precision in
+// fragment shaders is mediump which may only be 16 bits in ESSL 3, and GPU
+// cache address can exceed that maximum representable value.
+flat varying highp ivec2 v_op_table_address_vec;
+#define v_op v_op_table_address_vec.x
+#define v_table_address v_op_table_address_vec.y
+
+// We must keep this as highp as we encoutered shader compilation crashes on
+// Mali-T devices when mediump.
+flat varying highp mat4 v_color_mat;
+// The function to use for each component of a component transfer filter. Using a int[4]
+// or ivec4 (with each element or component containing the function for each component) has
+// ran in to bugs 1695912 and 1731758, so instead use a vec4 and cast the values to/from floats.
+flat varying mediump vec4 v_funcs;
+flat varying mediump vec4 v_color_offset;
+
+#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    ImageSource res = fetch_image_source(prim_user_data.x);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 inv_texture_size = vec2(1.0) / vec2(TEX_SIZE(sColor0).xy);
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    f = get_image_quad_uv(prim_user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+
+    v_uv = uv * inv_texture_size * mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective = perspective_interpolate;
+
+    v_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) * inv_texture_size.xyxy;
+
+    float amount = float(prim_user_data.z) / 65536.0;
+
+    v_op = prim_user_data.y & 0xffff;
+    v_amount = amount;
+
+    v_funcs.r = float((prim_user_data.y >> 28) & 0xf);
+    v_funcs.g = float((prim_user_data.y >> 24) & 0xf);
+    v_funcs.b = float((prim_user_data.y >> 20) & 0xf);
+    v_funcs.a = float((prim_user_data.y >> 16) & 0xf);
+
+    SetupFilterParams(
+        v_op,
+        amount,
+        prim_user_data.z,
+        v_color_offset,
+        v_color_mat,
+        v_table_address
+    );
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+    // Clamp the uvs to avoid sampling artifacts.
+    uv = clamp(uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    vec4 Cs = texture(sColor0, uv);
+
+    float alpha;
+    vec3 color;
+    CalculateFilter(
+        Cs,
+        v_op,
+        v_amount,
+        v_table_address,
+        v_color_offset,
+        v_color_mat,
+        v_funcs,
+        color,
+        alpha
+    );
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        alpha *= antialias_brush();
+    #endif
+
+    // Pre-multiply the alpha into the output value.
+    return Fragment(alpha * vec4(color, 1.0));
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_image.glsl b/gfx/wr/webrender/res/brush_image.glsl
new file mode 100644
index 0000000000..4a0cfe2229
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_image.glsl
@@ -0,0 +1,426 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+
+#include shared,prim_shared,brush
+
+// Interpolated UV coordinates to sample.
+varying highp vec2 v_uv;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+flat varying mediump vec4 v_color;
+flat varying mediump vec2 v_mask_swizzle;
+flat varying mediump vec2 v_tile_repeat_bounds;
+#endif
+
+// Normalized bounds of the source image in the texture.
+flat varying highp vec4 v_uv_bounds;
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying highp vec4 v_uv_sample_bounds;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to vector to work around bug 1630356.
+flat varying mediump vec2 v_perspective;
+
+#ifdef WR_VERTEX_SHADER
+
+// Must match the AlphaType enum.
+#define BLEND_MODE_ALPHA            0
+#define BLEND_MODE_PREMUL_ALPHA     1
+
+struct ImageBrushData {
+    vec4 color;
+    vec4 background_color;
+    vec2 stretch_size;
+};
+
+ImageBrushData fetch_image_data(int address) {
+    vec4[3] raw_data = fetch_from_gpu_cache_3(address);
+    ImageBrushData data = ImageBrushData(
+        raw_data[0],
+        raw_data[1],
+        raw_data[2].xy
+    );
+    return data;
+}
+
+vec2 modf2(vec2 x, vec2 y) {
+    return x - y * floor(x/y);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint prim_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 segment_data
+) {
+    ImageBrushData image_data = fetch_image_data(prim_address);
+
+    // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
+    // non-normalized texture coordinates.
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec2 texture_size = vec2(1, 1);
+#else
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+
+    ImageSource res = fetch_image_source(specific_resource_address);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    RectWithEndpoint local_rect = prim_rect;
+    vec2 stretch_size = image_data.stretch_size;
+    if (stretch_size.x < 0.0) {
+        stretch_size = rect_size(local_rect);
+    }
+
+    // If this segment should interpolate relative to the
+    // segment, modify the parameters for that.
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        local_rect = segment_rect;
+        stretch_size = rect_size(local_rect);
+
+        if ((brush_flags & BRUSH_FLAG_TEXEL_RECT) != 0) {
+            // If the extra data is a texel rect, modify the UVs.
+            vec2 uv_size = res.uv_rect.p1 - res.uv_rect.p0;
+            uv0 = res.uv_rect.p0 + segment_data.xy * uv_size;
+            uv1 = res.uv_rect.p0 + segment_data.zw * uv_size;
+        }
+
+        #ifdef WR_FEATURE_REPETITION
+            // TODO(bug 1609893): Move this logic to the CPU as well as other sources of
+            // branchiness in this shader.
+            if ((brush_flags & BRUSH_FLAG_TEXEL_RECT) != 0) {
+                // Value of the stretch size with repetition. We have to compute it for
+                // both axis even if we only repeat on one axis because the value for
+                // each axis depends on what the repeated value would have been for the
+                // other axis.
+                vec2 repeated_stretch_size = stretch_size;
+                // Size of the uv rect of the segment we are considering when computing
+                // the repetitions. For the fill area it is a tad more complicated as we
+                // have to use the uv size of the top-middle segment to drive horizontal
+                // repetitions, and the size of the left-middle segment to drive vertical
+                // repetitions. So we track the reference sizes for both axis separately
+                // even though in the common case (the border segments) they are the same.
+                vec2 horizontal_uv_size = uv1 - uv0;
+                vec2 vertical_uv_size = uv1 - uv0;
+                // We use top and left sizes by default and fall back to bottom and right
+                // when a size is empty.
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_NINEPATCH_MIDDLE) != 0) {
+                    repeated_stretch_size = segment_rect.p0 - prim_rect.p0;
+
+                    float epsilon = 0.001;
+
+                    // Adjust the the referecne uv size to compute vertical repetitions for
+                    // the fill area.
+                    vertical_uv_size.x = uv0.x - res.uv_rect.p0.x;
+                    if (vertical_uv_size.x < epsilon || repeated_stretch_size.x < epsilon) {
+                        vertical_uv_size.x = res.uv_rect.p1.x - uv1.x;
+                        repeated_stretch_size.x = prim_rect.p1.x - segment_rect.p1.x;
+                    }
+
+                    // Adjust the the referecne uv size to compute horizontal repetitions
+                    // for the fill area.
+                    horizontal_uv_size.y = uv0.y - res.uv_rect.p0.y;
+                    if (horizontal_uv_size.y < epsilon || repeated_stretch_size.y < epsilon) {
+                        horizontal_uv_size.y = res.uv_rect.p1.y - uv1.y;
+                        repeated_stretch_size.y = prim_rect.p1.y - segment_rect.p1.y;
+                    }
+                }
+
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X) != 0) {
+                    float uv_ratio = horizontal_uv_size.x / horizontal_uv_size.y;
+                    stretch_size.x = repeated_stretch_size.y * uv_ratio;
+                }
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y) != 0) {
+                    float uv_ratio = vertical_uv_size.y / vertical_uv_size.x;
+                    stretch_size.y = repeated_stretch_size.x * uv_ratio;
+                }
+
+            } else {
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X) != 0) {
+                    stretch_size.x = segment_data.z - segment_data.x;
+                }
+                if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y) != 0) {
+                    stretch_size.y = segment_data.w - segment_data.y;
+                }
+            }
+            if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X_ROUND) != 0) {
+                float segment_rect_width = segment_rect.p1.x - segment_rect.p0.x;
+                float nx = max(1.0, round(segment_rect_width / stretch_size.x));
+                stretch_size.x = segment_rect_width / nx;
+            }
+            if ((brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y_ROUND) != 0) {
+                float segment_rect_height = segment_rect.p1.y - segment_rect.p0.y;
+                float ny = max(1.0, round(segment_rect_height / stretch_size.y));
+                stretch_size.y = segment_rect_height / ny;
+            }
+        #endif
+    }
+
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+    v_perspective.x = perspective_interpolate;
+
+    // Handle case where the UV coords are inverted (e.g. from an
+    // external image).
+    vec2 min_uv = min(uv0, uv1);
+    vec2 max_uv = max(uv0, uv1);
+
+    v_uv_sample_bounds = vec4(
+        min_uv + vec2(0.5),
+        max_uv - vec2(0.5)
+    ) / texture_size.xyxy;
+
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    int color_mode = prim_user_data.x & 0xffff;
+    int blend_mode = prim_user_data.x >> 16;
+
+#endif
+
+    // Derive the texture coordinates for this image, based on
+    // whether the source image is a local-space or screen-space
+    // image.
+    int raster_space = prim_user_data.y;
+    if (raster_space == RASTER_SCREEN) {
+        // Since the screen space UVs specify an arbitrary quad, do
+        // a bilinear interpolation to get the correct UV for this
+        // local position.
+        f = get_image_quad_uv(specific_resource_address, f);
+    }
+
+    // Offset and scale v_uv here to avoid doing it in the fragment shader.
+    vec2 repeat = rect_size(local_rect) / stretch_size;
+    v_uv = mix(uv0, uv1, f) - min_uv;
+    v_uv *= repeat.xy;
+
+    vec2 normalized_offset = vec2(0.0);
+#ifdef WR_FEATURE_REPETITION
+    // In the case of border-image-repeat: repeat, we must apply an offset so that
+    // the first tile is centered.
+    //
+    // This is derived from:
+    //   uv_size = max_uv - min_uv
+    //   repeat = local_rect.size / stetch_size
+    //   layout_offset = local_rect.size / 2 - strecth_size / 2
+    //   texel_offset = layout_offset * uv_size / stretch_size
+    //   texel_offset = uv_size / 2 * (local_rect.size / stretch_size - stretch_size / stretch_size)
+    //   texel_offset = uv_size / 2 * (repeat - 1)
+    //
+    // The offset is then adjusted so that it loops in the [0, uv_size] range.
+    // In principle this is simply a modulo:
+    //
+    //   adjusted_offset = fact((repeat - 1)/2) * uv_size
+    //
+    // However we don't want fract's behavior with negative numbers which happens when the pattern
+    // is larger than the local rect (repeat is between 0 and 1), so we shift the content by 1 to
+    // remain positive.
+    //
+    //   adjusted_offset = fract(repeat/2 - 1/2 + 1) * uv_size
+    //
+    // `uv - offset` will go through another modulo in the fragment shader for which we again don't
+    // want the behavior for nagative numbers. We rearrange this here in the form
+    // `uv + (uv_size - offset)` to prevent that.
+    //
+    //   adjusted_offset = (1 - fract(repeat/2 - 1/2 + 1)) * uv_size
+    //
+    // We then separate the normalized part of the offset which we also need elsewhere.
+    bvec2 centered = bvec2(brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_X_CENTERED,
+                           brush_flags & BRUSH_FLAG_SEGMENT_REPEAT_Y_CENTERED);
+    // Use mix() rather than if statements due to a miscompilation on Adreno 3xx. See bug 1853573.
+    normalized_offset = mix(vec2(0.0), 1.0 - fract(repeat * 0.5 + 0.5), centered);
+    v_uv += normalized_offset * (max_uv - min_uv);
+#endif
+    v_uv /= texture_size;
+    if (perspective_interpolate == 0.0) {
+        v_uv *= vi.world_pos.w;
+    }
+
+#ifdef WR_FEATURE_TEXTURE_RECT
+    v_uv_bounds = vec4(0.0, 0.0, vec2(textureSize(sColor0)));
+#else
+    v_uv_bounds = vec4(min_uv, max_uv) / texture_size.xyxy;
+#endif
+
+#ifdef WR_FEATURE_REPETITION
+    // Normalize UV to 0..1 scale only if using repetition. Otherwise, leave
+    // UVs unnormalized since we won't compute a modulus without repetition
+    // enabled.
+    v_uv /= (v_uv_bounds.zw - v_uv_bounds.xy);
+#endif
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    v_tile_repeat_bounds = repeat.xy + normalized_offset;
+
+    float opacity = float(prim_user_data.z) / 65535.0;
+    switch (blend_mode) {
+        case BLEND_MODE_ALPHA:
+            image_data.color.a *= opacity;
+            break;
+        case BLEND_MODE_PREMUL_ALPHA:
+        default:
+            image_data.color *= opacity;
+            break;
+    }
+
+    switch (color_mode) {
+        case COLOR_MODE_ALPHA:
+        case COLOR_MODE_BITMAP_SHADOW:
+            #ifdef SWGL_BLEND
+                swgl_blendDropShadow(image_data.color);
+                v_mask_swizzle = vec2(1.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec2(0.0, 1.0);
+                v_color = image_data.color;
+            #endif
+            break;
+        case COLOR_MODE_IMAGE:
+            v_mask_swizzle = vec2(1.0, 0.0);
+            v_color = image_data.color;
+            break;
+        case COLOR_MODE_COLOR_BITMAP:
+            v_mask_swizzle = vec2(1.0, 0.0);
+            v_color = vec4(image_data.color.a);
+            break;
+        case COLOR_MODE_SUBPX_DUAL_SOURCE:
+            v_mask_swizzle = vec2(image_data.color.a, 0.0);
+            v_color = image_data.color;
+            break;
+        case COLOR_MODE_MULTIPLY_DUAL_SOURCE:
+            v_mask_swizzle = vec2(-image_data.color.a, image_data.color.a);
+            v_color = image_data.color;
+            break;
+        default:
+            v_mask_swizzle = vec2(0.0);
+            v_color = vec4(1.0);
+    }
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+vec2 compute_repeated_uvs(float perspective_divisor) {
+#ifdef WR_FEATURE_REPETITION
+    vec2 uv_size = v_uv_bounds.zw - v_uv_bounds.xy;
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+    vec2 local_uv = v_uv * perspective_divisor;
+    // This prevents the uv on the top and left parts of the primitive that was inflated
+    // for anti-aliasing purposes from going beyound the range covered by the regular
+    // (non-inflated) primitive.
+    local_uv = max(local_uv, vec2(0.0));
+
+    // Handle horizontal and vertical repetitions.
+    vec2 repeated_uv = fract(local_uv) * uv_size + v_uv_bounds.xy;
+
+    // This takes care of the bottom and right inflated parts.
+    // We do it after the modulo because the latter wraps around the values exactly on
+    // the right and bottom edges, which we do not want.
+    if (local_uv.x >= v_tile_repeat_bounds.x) {
+        repeated_uv.x = v_uv_bounds.z;
+    }
+    if (local_uv.y >= v_tile_repeat_bounds.y) {
+        repeated_uv.y = v_uv_bounds.w;
+    }
+    #else
+    vec2 repeated_uv = fract(v_uv * perspective_divisor) * uv_size + v_uv_bounds.xy;
+    #endif
+
+    return repeated_uv;
+#else
+    return v_uv * perspective_divisor + v_uv_bounds.xy;
+#endif
+}
+
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective.x);
+    vec2 repeated_uv = compute_repeated_uvs(perspective_divisor);
+
+    // Clamp the uvs to avoid sampling artifacts.
+    vec2 uv = clamp(repeated_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    vec4 texel = TEX_SAMPLE(sColor0, uv);
+
+    Fragment frag;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    #ifdef WR_FEATURE_ANTIALIASING
+        float alpha = antialias_brush();
+    #else
+        float alpha = 1.0;
+    #endif
+    #ifndef WR_FEATURE_DUAL_SOURCE_BLENDING
+        texel.rgb = texel.rgb * v_mask_swizzle.x + texel.aaa * v_mask_swizzle.y;
+    #endif
+
+    vec4 alpha_mask = texel * alpha;
+    frag.color = v_color * alpha_mask;
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        frag.blend = alpha_mask * v_mask_swizzle.x + alpha_mask.aaaa * v_mask_swizzle.y;
+    #endif
+#else
+    frag.color = texel;
+#endif
+
+    return frag;
+}
+
+#if defined(SWGL_DRAW_SPAN) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
+void swgl_drawSpanRGBA8() {
+    if (!swgl_isTextureRGBA8(sColor0)) {
+        return;
+    }
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        if (v_mask_swizzle != vec2(1.0, 0.0)) {
+            return;
+        }
+    #endif
+
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_perspective.x);
+
+    #ifdef WR_FEATURE_REPETITION
+        // Get the UVs before any repetition, scaling, or offsetting has occurred...
+        vec2 uv = v_uv * perspective_divisor;
+    #else
+        vec2 uv = compute_repeated_uvs(perspective_divisor);
+    #endif
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+    if (v_color != vec4(1.0)) {
+        #ifdef WR_FEATURE_REPETITION
+            swgl_commitTextureRepeatColorRGBA8(sColor0, uv, v_tile_repeat_bounds, v_uv_bounds, v_uv_sample_bounds, v_color);
+        #else
+            swgl_commitTextureColorRGBA8(sColor0, uv, v_uv_sample_bounds, v_color);
+        #endif
+        return;
+    }
+    // No color scaling required, so just fall through to a normal textured span...
+    #endif
+
+    #ifdef WR_FEATURE_REPETITION
+        #ifdef WR_FEATURE_ALPHA_PASS
+            swgl_commitTextureRepeatRGBA8(sColor0, uv, v_tile_repeat_bounds, v_uv_bounds, v_uv_sample_bounds);
+        #else
+            swgl_commitTextureRepeatRGBA8(sColor0, uv, vec2(0.0), v_uv_bounds, v_uv_sample_bounds);
+        #endif
+    #else
+        swgl_commitTextureRGBA8(sColor0, uv, v_uv_sample_bounds);
+    #endif
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_linear_gradient.glsl b/gfx/wr/webrender/res/brush_linear_gradient.glsl
new file mode 100644
index 0000000000..ceb1b14e5b
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_linear_gradient.glsl
@@ -0,0 +1,95 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 2
+
+#include shared,prim_shared,brush,gpu_buffer,gradient_shared
+
+// Start offset. Packed in to vector to work around bug 1630356.
+flat varying mediump vec2 v_start_offset;
+
+flat varying mediump vec2 v_scale_dir;
+
+#ifdef WR_VERTEX_SHADER
+
+struct Gradient {
+    vec4 start_end_point;
+    int extend_mode;
+    vec2 stretch_size;
+};
+
+Gradient fetch_gradient(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    return Gradient(
+        data[0],
+        int(data[1].x),
+        data[1].yz
+    );
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 texel_rect
+) {
+    Gradient gradient = fetch_gradient(prim_address);
+
+    write_gradient_vertex(
+        vi,
+        local_rect,
+        segment_rect,
+        prim_user_data,
+        brush_flags,
+        texel_rect,
+        gradient.extend_mode,
+        gradient.stretch_size
+    );
+
+    vec2 start_point = gradient.start_end_point.xy;
+    vec2 end_point = gradient.start_end_point.zw;
+    vec2 dir = end_point - start_point;
+
+    // Normalize UV and offsets to 0..1 scale.
+    v_scale_dir = dir / dot(dir, dir);
+    v_start_offset.x = dot(start_point, v_scale_dir);
+    v_scale_dir *= v_repeated_size;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+float get_gradient_offset(vec2 pos) {
+    // Project position onto a direction vector to compute offset.
+    return dot(pos, v_scale_dir) - v_start_offset.x;
+}
+
+Fragment brush_fs() {
+    vec4 color = sample_gradient(get_gradient_offset(compute_repeated_pos()));
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+
+    return Fragment(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x), int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+
+    swgl_commitLinearGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, true, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_scale_dir, v_start_offset.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_mix_blend.glsl b/gfx/wr/webrender/res/brush_mix_blend.glsl
new file mode 100644
index 0000000000..c18b95161b
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_mix_blend.glsl
@@ -0,0 +1,332 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush
+
+// UV and bounds for the source image
+varying highp vec2 v_src_uv;
+flat varying highp vec4 v_src_uv_sample_bounds;
+
+// UV and bounds for the backdrop image
+varying highp vec2 v_backdrop_uv;
+flat varying highp vec4 v_backdrop_uv_sample_bounds;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to vector to work around bug 1630356.
+flat varying mediump vec2 v_perspective;
+// mix-blend op. Packed in to vector to work around bug 1630356.
+flat varying mediump ivec2 v_op;
+
+#ifdef WR_VERTEX_SHADER
+
+void get_uv(
+    int res_address,
+    vec2 f,
+    ivec2 texture_size,
+    float perspective_f,
+    out vec2 out_uv,
+    out vec4 out_uv_sample_bounds
+) {
+    ImageSource res = fetch_image_source(res_address);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 inv_texture_size = vec2(1.0) / vec2(texture_size);
+    f = get_image_quad_uv(res_address, f);
+    vec2 uv = mix(uv0, uv1, f);
+
+    out_uv = uv * inv_texture_size * perspective_f;
+    out_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) * inv_texture_size.xyxy;
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+    float perspective_f = mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective.x = perspective_interpolate;
+    v_op.x = prim_user_data.x;
+
+    get_uv(
+        prim_user_data.y,
+        f,
+        TEX_SIZE(sColor0).xy,
+        1.0,
+        v_backdrop_uv,
+        v_backdrop_uv_sample_bounds
+    );
+
+    get_uv(
+        prim_user_data.z,
+        f,
+        TEX_SIZE(sColor1).xy,
+        perspective_f,
+        v_src_uv,
+        v_src_uv_sample_bounds
+    );
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec3 Multiply(vec3 Cb, vec3 Cs) {
+    return Cb * Cs;
+}
+
+vec3 Screen(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - (Cb * Cs);
+}
+
+vec3 HardLight(vec3 Cb, vec3 Cs) {
+    vec3 m = Multiply(Cb, 2.0 * Cs);
+    vec3 s = Screen(Cb, 2.0 * Cs - 1.0);
+    vec3 edge = vec3(0.5, 0.5, 0.5);
+    return mix(m, s, step(edge, Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorDodge(float Cb, float Cs) {
+    if (Cb == 0.0)
+        return 0.0;
+    else if (Cs == 1.0)
+        return 1.0;
+    else
+        return min(1.0, Cb / (1.0 - Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorBurn(float Cb, float Cs) {
+    if (Cb == 1.0)
+        return 1.0;
+    else if (Cs == 0.0)
+        return 0.0;
+    else
+        return 1.0 - min(1.0, (1.0 - Cb) / Cs);
+}
+
+float SoftLight(float Cb, float Cs) {
+    if (Cs <= 0.5) {
+        return Cb - (1.0 - 2.0 * Cs) * Cb * (1.0 - Cb);
+    } else {
+        float D;
+
+        if (Cb <= 0.25)
+            D = ((16.0 * Cb - 12.0) * Cb + 4.0) * Cb;
+        else
+            D = sqrt(Cb);
+
+        return Cb + (2.0 * Cs - 1.0) * (D - Cb);
+    }
+}
+
+vec3 Difference(vec3 Cb, vec3 Cs) {
+    return abs(Cb - Cs);
+}
+
+// These functions below are taken from the spec.
+// There's probably a much quicker way to implement
+// them in GLSL...
+float Sat(vec3 c) {
+    return max(c.r, max(c.g, c.b)) - min(c.r, min(c.g, c.b));
+}
+
+float Lum(vec3 c) {
+    vec3 f = vec3(0.3, 0.59, 0.11);
+    return dot(c, f);
+}
+
+vec3 ClipColor(vec3 C) {
+    float L = Lum(C);
+    float n = min(C.r, min(C.g, C.b));
+    float x = max(C.r, max(C.g, C.b));
+
+    if (n < 0.0)
+        C = L + (((C - L) * L) / (L - n));
+
+    if (x > 1.0)
+        C = L + (((C - L) * (1.0 - L)) / (x - L));
+
+    return C;
+}
+
+vec3 SetLum(vec3 C, float l) {
+    float d = l - Lum(C);
+    return ClipColor(C + d);
+}
+
+void SetSatInner(inout float Cmin, inout float Cmid, inout float Cmax, float s) {
+    if (Cmax > Cmin) {
+        Cmid = (((Cmid - Cmin) * s) / (Cmax - Cmin));
+        Cmax = s;
+    } else {
+        Cmid = 0.0;
+        Cmax = 0.0;
+    }
+    Cmin = 0.0;
+}
+
+vec3 SetSat(vec3 C, float s) {
+    if (C.r <= C.g) {
+        if (C.g <= C.b) {
+            SetSatInner(C.r, C.g, C.b, s);
+        } else {
+            if (C.r <= C.b) {
+                SetSatInner(C.r, C.b, C.g, s);
+            } else {
+                SetSatInner(C.b, C.r, C.g, s);
+            }
+        }
+    } else {
+        if (C.r <= C.b) {
+            SetSatInner(C.g, C.r, C.b, s);
+        } else {
+            if (C.g <= C.b) {
+                SetSatInner(C.g, C.b, C.r, s);
+            } else {
+                SetSatInner(C.b, C.g, C.r, s);
+            }
+        }
+    }
+    return C;
+}
+
+vec3 Hue(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cs, Sat(Cb)), Lum(Cb));
+}
+
+vec3 Saturation(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cb, Sat(Cs)), Lum(Cb));
+}
+
+vec3 Color(vec3 Cb, vec3 Cs) {
+    return SetLum(Cs, Lum(Cb));
+}
+
+vec3 Luminosity(vec3 Cb, vec3 Cs) {
+    return SetLum(Cb, Lum(Cs));
+}
+
+const int MixBlendMode_Multiply    = 1;
+const int MixBlendMode_Screen      = 2;
+const int MixBlendMode_Overlay     = 3;
+const int MixBlendMode_Darken      = 4;
+const int MixBlendMode_Lighten     = 5;
+const int MixBlendMode_ColorDodge  = 6;
+const int MixBlendMode_ColorBurn   = 7;
+const int MixBlendMode_HardLight   = 8;
+const int MixBlendMode_SoftLight   = 9;
+const int MixBlendMode_Difference  = 10;
+const int MixBlendMode_Exclusion   = 11;
+const int MixBlendMode_Hue         = 12;
+const int MixBlendMode_Saturation  = 13;
+const int MixBlendMode_Color       = 14;
+const int MixBlendMode_Luminosity  = 15;
+const int MixBlendMode_PlusLighter = 16;
+
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective.x);
+
+    vec2 src_uv = v_src_uv * perspective_divisor;
+    src_uv = clamp(src_uv, v_src_uv_sample_bounds.xy, v_src_uv_sample_bounds.zw);
+
+    vec2 backdrop_uv = clamp(v_backdrop_uv, v_backdrop_uv_sample_bounds.xy, v_backdrop_uv_sample_bounds.zw);
+
+    vec4 Cb = texture(sColor0, backdrop_uv);
+    vec4 Cs = texture(sColor1, src_uv);
+
+    // The mix-blend-mode functions assume no premultiplied alpha
+    if (Cb.a != 0.0) {
+        Cb.rgb /= Cb.a;
+    }
+
+    if (Cs.a != 0.0) {
+        Cs.rgb /= Cs.a;
+    }
+
+    // Return yellow if none of the branches match (shouldn't happen).
+    vec4 result = vec4(1.0, 1.0, 0.0, 1.0);
+
+    // On Android v_op has been packed in to a vector to avoid a driver bug
+    // on Adreno 3xx. However, this runs in to another Adreno 3xx driver bug
+    // where the switch doesn't match any cases. Unpacking the value from the
+    // vec in to a local variable prior to the switch works around this, but
+    // gets optimized away by glslopt. Adding a bitwise AND prevents that.
+    // See bug 1726755.
+    // default: default: to appease angle_shader_validation
+    switch (v_op.x & 0xFF) {
+        case MixBlendMode_Multiply:
+            result.rgb = Multiply(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Overlay:
+            // Overlay is inverse of Hardlight
+            result.rgb = HardLight(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_Darken:
+            result.rgb = min(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_Lighten:
+            result.rgb = max(Cs.rgb, Cb.rgb);
+            break;
+        case MixBlendMode_ColorDodge:
+            result.r = ColorDodge(Cb.r, Cs.r);
+            result.g = ColorDodge(Cb.g, Cs.g);
+            result.b = ColorDodge(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_ColorBurn:
+            result.r = ColorBurn(Cb.r, Cs.r);
+            result.g = ColorBurn(Cb.g, Cs.g);
+            result.b = ColorBurn(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_HardLight:
+            result.rgb = HardLight(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_SoftLight:
+            result.r = SoftLight(Cb.r, Cs.r);
+            result.g = SoftLight(Cb.g, Cs.g);
+            result.b = SoftLight(Cb.b, Cs.b);
+            break;
+        case MixBlendMode_Difference:
+            result.rgb = Difference(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Hue:
+            result.rgb = Hue(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Saturation:
+            result.rgb = Saturation(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Color:
+            result.rgb = Color(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Luminosity:
+            result.rgb = Luminosity(Cb.rgb, Cs.rgb);
+            break;
+        case MixBlendMode_Screen:
+        case MixBlendMode_Exclusion:
+        case MixBlendMode_PlusLighter:
+            // This should be unreachable, since we implement
+            // MixBlendMode::Screen, MixBlendMode::Exclusion and
+            // MixBlendMode::PlusLighter using glBlendFuncSeparate.
+            break;
+        default: break;
+    }
+
+    result.rgb = (1.0 - Cb.a) * Cs.rgb + Cb.a * result.rgb;
+    result.a = Cs.a;
+    result.rgb *= result.a;
+
+    return Fragment(result);
+}
+#endif
diff --git a/gfx/wr/webrender/res/brush_opacity.glsl b/gfx/wr/webrender/res/brush_opacity.glsl
new file mode 100644
index 0000000000..caef83304a
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_opacity.glsl
@@ -0,0 +1,83 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 3
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared,brush
+
+// Interpolated UV coordinates to sample.
+varying highp vec2 v_uv;
+
+// Normalized bounds of the source image in the texture, adjusted to avoid
+// sampling artifacts.
+flat varying highp vec4 v_uv_sample_bounds;
+
+flat varying mediump vec2 v_opacity_perspective_vec;
+#define v_opacity v_opacity_perspective_vec.x
+// Flag to allow perspective interpolation of UV.
+#define v_perspective v_opacity_perspective_vec.y
+
+#ifdef WR_VERTEX_SHADER
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    ImageSource res = fetch_image_source(prim_user_data.x);
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+    f = get_image_quad_uv(prim_user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = (brush_flags & BRUSH_FLAG_PERSPECTIVE_INTERPOLATION) != 0 ? 1.0 : 0.0;
+
+    v_uv = uv / texture_size * mix(vi.world_pos.w, 1.0, perspective_interpolate);
+    v_perspective = perspective_interpolate;
+
+    v_uv_sample_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+
+    v_opacity = clamp(float(prim_user_data.y) / 65536.0, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+    // Clamp the uvs to avoid sampling artifacts.
+    uv = clamp(uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+
+    // No need to un-premultiply since we'll only apply a factor to the alpha.
+    vec4 color = texture(sColor0, uv);
+
+    float alpha = v_opacity;
+
+    #ifdef WR_FEATURE_ALPHA_PASS
+        alpha *= antialias_brush();
+    #endif
+
+    // Pre-multiply the contribution of the opacity factor.
+    return Fragment(alpha * color);
+}
+
+#if defined(SWGL_DRAW_SPAN) && !defined(WR_FEATURE_DUAL_SOURCE_BLENDING)
+void swgl_drawSpanRGBA8() {
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_perspective);
+    vec2 uv = v_uv * perspective_divisor;
+
+    swgl_commitTextureLinearColorRGBA8(sColor0, uv, v_uv_sample_bounds, v_opacity);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_solid.glsl b/gfx/wr/webrender/res/brush_solid.glsl
new file mode 100644
index 0000000000..d1028179c4
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_solid.glsl
@@ -0,0 +1,60 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 1
+
+#include shared,prim_shared,brush
+
+flat varying mediump vec4 v_color;
+
+#ifdef WR_VERTEX_SHADER
+
+struct SolidBrush {
+    vec4 color;
+};
+
+SolidBrush fetch_solid_primitive(int address) {
+    vec4 data = fetch_from_gpu_cache_1(address);
+    return SolidBrush(data);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    SolidBrush prim = fetch_solid_primitive(prim_address);
+
+    float opacity = float(prim_user_data.x) / 65535.0;
+    v_color = prim.color * opacity;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+Fragment brush_fs() {
+    vec4 color = v_color;
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+    return Fragment(color);
+}
+
+#if defined(SWGL_DRAW_SPAN) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
+void swgl_drawSpanRGBA8() {
+    swgl_commitSolidRGBA8(v_color);
+}
+
+void swgl_drawSpanR8() {
+    swgl_commitSolidR8(v_color.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/brush_yuv_image.glsl b/gfx/wr/webrender/res/brush_yuv_image.glsl
new file mode 100644
index 0000000000..4b7e5f1944
--- /dev/null
+++ b/gfx/wr/webrender/res/brush_yuv_image.glsl
@@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define VECS_PER_SPECIFIC_BRUSH 1
+
+#include shared,prim_shared,brush,yuv
+
+varying highp vec2 vUv_Y;
+flat varying highp vec4 vUvBounds_Y;
+
+varying highp vec2 vUv_U;
+flat varying highp vec4 vUvBounds_U;
+
+varying highp vec2 vUv_V;
+flat varying highp vec4 vUvBounds_V;
+
+flat varying YUV_PRECISION vec3 vYcbcrBias;
+flat varying YUV_PRECISION mat3 vRgbFromDebiasedYcbcr;
+
+// YUV format. Packed in to vector to work around bug 1630356.
+flat varying mediump ivec2 vFormat;
+
+#ifdef SWGL_DRAW_SPAN
+flat varying mediump int vRescaleFactor;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+YuvPrimitive fetch_yuv_primitive(int address) {
+    vec4 data = fetch_from_gpu_cache_1(address);
+    // From YuvImageData.write_prim_gpu_blocks:
+    int channel_bit_depth = int(data.x);
+    int color_space = int(data.y);
+    int yuv_format = int(data.z);
+    return YuvPrimitive(channel_bit_depth, color_space, yuv_format);
+}
+
+void brush_vs(
+    VertexInfo vi,
+    int prim_address,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int specific_resource_address,
+    mat4 transform,
+    PictureTask pic_task,
+    int brush_flags,
+    vec4 unused
+) {
+    vec2 f = (vi.local_pos - local_rect.p0) / rect_size(local_rect);
+
+    YuvPrimitive prim = fetch_yuv_primitive(prim_address);
+
+#ifdef SWGL_DRAW_SPAN
+    // swgl_commitTextureLinearYUV needs to know the color space specifier and
+    // also needs to know how many bits of scaling are required to normalize
+    // HDR textures. Note that MSB HDR formats don't need renormalization.
+    vRescaleFactor = 0;
+    if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
+        vRescaleFactor = 16 - prim.channel_bit_depth;
+    }
+#endif
+
+    YuvColorMatrixInfo mat_info = get_rgb_from_ycbcr_info(prim);
+    vYcbcrBias = mat_info.ycbcr_bias;
+    vRgbFromDebiasedYcbcr = mat_info.rgb_from_debiased_ycbrc;
+
+    vFormat.x = prim.yuv_format;
+
+    // The additional test for 99 works around a gen6 shader compiler bug: 1708937
+    if (vFormat.x == YUV_FORMAT_PLANAR || vFormat.x == 99) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        ImageSource res_u = fetch_image_source(prim_user_data.y);
+        ImageSource res_v = fetch_image_source(prim_user_data.z);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+        write_uv_rect(res_u.uv_rect.p0, res_u.uv_rect.p1, f, TEX_SIZE_YUV(sColor1), vUv_U, vUvBounds_U);
+        write_uv_rect(res_v.uv_rect.p0, res_v.uv_rect.p1, f, TEX_SIZE_YUV(sColor2), vUv_V, vUvBounds_V);
+    } else if (vFormat.x == YUV_FORMAT_NV12 || vFormat.x == YUV_FORMAT_P010) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        ImageSource res_u = fetch_image_source(prim_user_data.y);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+        write_uv_rect(res_u.uv_rect.p0, res_u.uv_rect.p1, f, TEX_SIZE_YUV(sColor1), vUv_U, vUvBounds_U);
+    } else if (vFormat.x == YUV_FORMAT_INTERLEAVED) {
+        ImageSource res_y = fetch_image_source(prim_user_data.x);
+        write_uv_rect(res_y.uv_rect.p0, res_y.uv_rect.p1, f, TEX_SIZE_YUV(sColor0), vUv_Y, vUvBounds_Y);
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+Fragment brush_fs() {
+    vec4 color = sample_yuv(
+        vFormat.x,
+        vYcbcrBias,
+        vRgbFromDebiasedYcbcr,
+        vUv_Y,
+        vUv_U,
+        vUv_V,
+        vUvBounds_Y,
+        vUvBounds_U,
+        vUvBounds_V
+    );
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    color *= antialias_brush();
+#endif
+
+    //color.r = float(100+vFormat) / 255.0;
+    //color.g = vYcbcrBias.x;
+    //color.b = vYcbcrBias.y;
+    return Fragment(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    if (vFormat.x == YUV_FORMAT_PLANAR) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    sColor1, vUv_U, vUvBounds_U,
+                                    sColor2, vUv_V, vUvBounds_V,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vFormat.x == YUV_FORMAT_NV12 || vFormat.x == YUV_FORMAT_P010) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    sColor1, vUv_U, vUvBounds_U,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vFormat.x == YUV_FORMAT_INTERLEAVED) {
+        swgl_commitTextureLinearYUV(sColor0, vUv_Y, vUvBounds_Y,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/clip_shared.glsl b/gfx/wr/webrender/res/clip_shared.glsl
new file mode 100644
index 0000000000..ef28bfde22
--- /dev/null
+++ b/gfx/wr/webrender/res/clip_shared.glsl
@@ -0,0 +1,80 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include rect,render_task,gpu_cache,transform
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aClipDeviceArea;
+PER_INSTANCE in vec4 aClipOrigins;
+PER_INSTANCE in float aDevicePixelScale;
+PER_INSTANCE in ivec2 aTransformIds;
+
+struct ClipMaskInstanceCommon {
+    RectWithEndpoint sub_rect;
+    vec2 task_origin;
+    vec2 screen_origin;
+    float device_pixel_scale;
+    int clip_transform_id;
+    int prim_transform_id;
+};
+
+ClipMaskInstanceCommon fetch_clip_item_common() {
+    ClipMaskInstanceCommon cmi;
+
+    cmi.sub_rect = RectWithEndpoint(aClipDeviceArea.xy, aClipDeviceArea.zw);
+    cmi.task_origin = aClipOrigins.xy;
+    cmi.screen_origin = aClipOrigins.zw;
+    cmi.device_pixel_scale = aDevicePixelScale;
+    cmi.clip_transform_id = aTransformIds.x;
+    cmi.prim_transform_id = aTransformIds.y;
+
+    return cmi;
+}
+
+struct ClipVertexInfo {
+    vec4 local_pos;
+    RectWithEndpoint clipped_local_rect;
+};
+
+// The transformed vertex function that always covers the whole clip area,
+// which is the intersection of all clip instances of a given primitive
+ClipVertexInfo write_clip_tile_vertex(RectWithEndpoint local_clip_rect,
+                                      Transform prim_transform,
+                                      Transform clip_transform,
+                                      RectWithEndpoint sub_rect,
+                                      vec2 task_origin,
+                                      vec2 screen_origin,
+                                      float device_pixel_scale) {
+    vec2 device_pos = screen_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy);
+    vec2 world_pos = device_pos / device_pixel_scale;
+
+    vec4 pos = prim_transform.m * vec4(world_pos, 0.0, 1.0);
+    pos.xyz /= pos.w;
+
+    vec4 p = get_node_pos(pos.xy, clip_transform);
+    vec4 local_pos = p * pos.w;
+
+    //TODO: Interpolate in clip space, where "local_pos.w" contains
+    // the W of the homogeneous transform *from* clip space into the world.
+    //    float interpolate_w = 1.0 / local_pos.w;
+    // This is problematic today, because the W<=0 hemisphere is going to be
+    // clipped, while we currently want this shader to fill out the whole rect.
+    // We can therefore simplify this when the clip construction is rewritten
+    // to only affect the areas touched by a clip.
+    vec4 vertex_pos = vec4(
+        task_origin + mix(sub_rect.p0, sub_rect.p1, aPosition.xy),
+        0.0,
+        1.0
+    );
+
+    gl_Position = uTransform * vertex_pos;
+
+    init_transform_vs(vec4(local_clip_rect.p0, local_clip_rect.p1));
+
+    ClipVertexInfo vi = ClipVertexInfo(local_pos, local_clip_rect);
+    return vi;
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/composite.glsl b/gfx/wr/webrender/res/composite.glsl
new file mode 100644
index 0000000000..4d30685ea1
--- /dev/null
+++ b/gfx/wr/webrender/res/composite.glsl
@@ -0,0 +1,242 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Composite a picture cache tile into the framebuffer.
+
+// This shader must remain compatible with ESSL 1, at least for the
+// WR_FEATURE_TEXTURE_EXTERNAL_ESSL1 feature, so that it can be used to render
+// video on GLES devices without GL_OES_EGL_image_external_essl3 support.
+// This means we cannot use textureSize(), int inputs/outputs, etc.
+
+#include shared
+
+#ifdef WR_FEATURE_YUV
+#include yuv
+#endif
+
+#ifdef WR_FEATURE_YUV
+flat varying YUV_PRECISION vec3 vYcbcrBias;
+flat varying YUV_PRECISION mat3 vRgbFromDebiasedYcbcr;
+// YUV format. Packed in to vector to avoid bug 1630356.
+flat varying mediump ivec2 vYuvFormat;
+
+#ifdef SWGL_DRAW_SPAN
+flat varying mediump int vRescaleFactor;
+#endif
+varying highp vec2 vUV_y;
+varying highp vec2 vUV_u;
+varying highp vec2 vUV_v;
+flat varying highp vec4 vUVBounds_y;
+flat varying highp vec4 vUVBounds_u;
+flat varying highp vec4 vUVBounds_v;
+#else
+varying highp vec2 vUv;
+#ifndef WR_FEATURE_FAST_PATH
+flat varying mediump vec4 vColor;
+flat varying highp vec4 vUVBounds;
+#endif
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+uniform mediump vec2 uTextureSize;
+#endif
+#endif
+
+#ifdef WR_VERTEX_SHADER
+// CPU side data is in CompositeInstance (gpu_types.rs) and is
+// converted to GPU data using desc::COMPOSITE (renderer.rs) by
+// filling vaos.composite_vao with VertexArrayKind::Composite.
+PER_INSTANCE attribute vec4 aLocalRect;
+PER_INSTANCE attribute vec4 aDeviceClipRect;
+PER_INSTANCE attribute vec4 aColor;
+PER_INSTANCE attribute vec4 aParams;
+PER_INSTANCE attribute vec4 aTransform;
+
+#ifdef WR_FEATURE_YUV
+// YUV treats these as a UV clip rect (clamp)
+PER_INSTANCE attribute vec4 aUvRect0;
+PER_INSTANCE attribute vec4 aUvRect1;
+PER_INSTANCE attribute vec4 aUvRect2;
+#else
+PER_INSTANCE attribute vec4 aUvRect0;
+#endif
+
+vec2 apply_transform(vec2 p, vec4 transform) {
+    return p * transform.xy + transform.zw;
+}
+
+#ifdef WR_FEATURE_YUV
+YuvPrimitive fetch_yuv_primitive() {
+    // From ExternalSurfaceDependency::Yuv:
+    int color_space = int(aParams.y);
+    int yuv_format = int(aParams.z);
+    int channel_bit_depth = int(aParams.w);
+    return YuvPrimitive(channel_bit_depth, color_space, yuv_format);
+}
+#endif
+
+void main(void) {
+	// Get world position
+    vec2 world_p0 = apply_transform(aLocalRect.xy, aTransform);
+    vec2 world_p1 = apply_transform(aLocalRect.zw, aTransform);
+    vec2 world_pos = mix(world_p0, world_p1, aPosition.xy);
+
+    // Clip the position to the world space clip rect
+    vec2 clipped_world_pos = clamp(world_pos, aDeviceClipRect.xy, aDeviceClipRect.zw);
+
+    // Derive the normalized UV from the clipped vertex position
+    vec2 uv = (clipped_world_pos - world_p0) / (world_p1 - world_p0);
+
+#ifdef WR_FEATURE_YUV
+    YuvPrimitive prim = fetch_yuv_primitive();
+
+#ifdef SWGL_DRAW_SPAN
+    // swgl_commitTextureLinearYUV needs to know the color space specifier and
+    // also needs to know how many bits of scaling are required to normalize
+    // HDR textures. Note that MSB HDR formats don't need renormalization.
+    vRescaleFactor = 0;
+    if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
+        vRescaleFactor = 16 - prim.channel_bit_depth;
+    }
+#endif
+
+    YuvColorMatrixInfo mat_info = get_rgb_from_ycbcr_info(prim);
+    vYcbcrBias = mat_info.ycbcr_bias;
+    vRgbFromDebiasedYcbcr = mat_info.rgb_from_debiased_ycbrc;
+
+    vYuvFormat.x = prim.yuv_format;
+
+    write_uv_rect(
+        aUvRect0.xy,
+        aUvRect0.zw,
+        uv,
+        TEX_SIZE_YUV(sColor0),
+        vUV_y,
+        vUVBounds_y
+    );
+    write_uv_rect(
+        aUvRect1.xy,
+        aUvRect1.zw,
+        uv,
+        TEX_SIZE_YUV(sColor1),
+        vUV_u,
+        vUVBounds_u
+    );
+    write_uv_rect(
+        aUvRect2.xy,
+        aUvRect2.zw,
+        uv,
+        TEX_SIZE_YUV(sColor2),
+        vUV_v,
+        vUVBounds_v
+    );
+#else
+    uv = mix(aUvRect0.xy, aUvRect0.zw, uv);
+    // The uvs may be inverted, so use the min and max for the bounds
+    vec4 uvBounds = vec4(min(aUvRect0.xy, aUvRect0.zw), max(aUvRect0.xy, aUvRect0.zw));
+    int rescale_uv = int(aParams.y);
+    if (rescale_uv == 1)
+    {
+        // using an atlas, so UVs are in pixels, and need to be
+        // normalized and clamped.
+#if defined(WR_FEATURE_TEXTURE_RECT)
+        vec2 texture_size = vec2(1.0, 1.0);
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+        vec2 texture_size = uTextureSize;
+#else
+        vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+        uvBounds += vec4(0.5, 0.5, -0.5, -0.5);
+    #ifndef WR_FEATURE_TEXTURE_RECT
+        uv /= texture_size;
+        uvBounds /= texture_size.xyxy;
+    #endif
+    }
+
+    vUv = uv;
+#ifndef WR_FEATURE_FAST_PATH
+    vUVBounds = uvBounds;
+    // Pass through color
+    vColor = aColor;
+#endif
+#endif
+
+    gl_Position = uTransform * vec4(clipped_world_pos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+#ifdef WR_FEATURE_YUV
+    vec4 color = sample_yuv(
+        vYuvFormat.x,
+        vYcbcrBias,
+        vRgbFromDebiasedYcbcr,
+        vUV_y,
+        vUV_u,
+        vUV_v,
+        vUVBounds_y,
+        vUVBounds_u,
+        vUVBounds_v
+    );
+#else
+    // The color is just the texture sample modulated by a supplied color.
+    // In the fast path we avoid clamping the UV coordinates and modulating by the color.
+#ifdef WR_FEATURE_FAST_PATH
+    vec2 uv = vUv;
+#else
+    vec2 uv = clamp(vUv, vUVBounds.xy, vUVBounds.zw);
+#endif
+    vec4 texel = TEX_SAMPLE(sColor0, uv);
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 color = texel;
+#else
+    vec4 color = vColor * texel;
+#endif
+#endif
+    write_output(color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+#ifdef WR_FEATURE_YUV
+    if (vYuvFormat.x == YUV_FORMAT_PLANAR) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    sColor1, vUV_u, vUVBounds_u,
+                                    sColor2, vUV_v, vUVBounds_v,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vYuvFormat.x == YUV_FORMAT_NV12 || vYuvFormat.x == YUV_FORMAT_P010) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    sColor1, vUV_u, vUVBounds_u,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    } else if (vYuvFormat.x == YUV_FORMAT_INTERLEAVED) {
+        swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
+                                    vYcbcrBias,
+                                    vRgbFromDebiasedYcbcr,
+                                    vRescaleFactor);
+    }
+#else
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 color = vec4(1.0);
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec4 uvBounds = vec4(vec2(0.0), vec2(textureSize(sColor0)));
+#else
+    vec4 uvBounds = vec4(0.0, 0.0, 1.0, 1.0);
+#endif
+#else
+    vec4 color = vColor;
+    vec4 uvBounds = vUVBounds;
+#endif
+    if (color != vec4(1.0)) {
+        swgl_commitTextureColorRGBA8(sColor0, vUv, uvBounds, color);
+    } else {
+        swgl_commitTextureRGBA8(sColor0, vUv, uvBounds);
+    }
+#endif
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_blur.glsl b/gfx/wr/webrender/res/cs_blur.glsl
new file mode 100644
index 0000000000..51927e1a65
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_blur.glsl
@@ -0,0 +1,196 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+varying highp vec2 vUv;
+flat varying highp vec4 vUvRect;
+flat varying mediump vec2 vOffsetScale;
+// The number of pixels on each end that we apply the blur filter over.
+// Packed in to vector to work around bug 1630356.
+flat varying mediump ivec2 vSupport;
+flat varying mediump vec2 vGaussCoefficients;
+
+#ifdef WR_VERTEX_SHADER
+// Applies a separable gaussian blur in one direction, as specified
+// by the dir field in the blur command.
+
+#define DIR_HORIZONTAL  0
+#define DIR_VERTICAL    1
+
+PER_INSTANCE in int aBlurRenderTaskAddress;
+PER_INSTANCE in int aBlurSourceTaskAddress;
+PER_INSTANCE in int aBlurDirection;
+
+struct BlurTask {
+    RectWithEndpoint task_rect;
+    float blur_radius;
+    vec2 blur_region;
+};
+
+BlurTask fetch_blur_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    BlurTask task = BlurTask(
+        task_data.task_rect,
+        task_data.user_data.x,
+        task_data.user_data.yz
+    );
+
+    return task;
+}
+
+void calculate_gauss_coefficients(float sigma) {
+    // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
+    vGaussCoefficients = vec2(1.0 / (sqrt(2.0 * 3.14159265) * sigma),
+                              exp(-0.5 / (sigma * sigma)));
+
+    // Pre-calculate the coefficient total in the vertex shader so that
+    // we can avoid having to do it per-fragment and also avoid division
+    // by zero in the degenerate case.
+    vec3 gauss_coefficient = vec3(vGaussCoefficients,
+                                  vGaussCoefficients.y * vGaussCoefficients.y);
+    float gauss_coefficient_total = gauss_coefficient.x;
+    for (int i = 1; i <= vSupport.x; i += 2) {
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        float gauss_coefficient_subtotal = gauss_coefficient.x;
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        gauss_coefficient_subtotal += gauss_coefficient.x;
+        gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal;
+    }
+
+    // Scale initial coefficient by total to avoid passing the total separately
+    // to the fragment shader.
+    vGaussCoefficients.x /= gauss_coefficient_total;
+}
+
+void main(void) {
+    BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
+    RectWithEndpoint src_rect = fetch_render_task_rect(aBlurSourceTaskAddress);
+
+    RectWithEndpoint target_rect = blur_task.task_rect;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+
+    // Ensure that the support is an even number of pixels to simplify the
+    // fragment shader logic.
+    //
+    // TODO(pcwalton): Actually make use of this fact and use the texture
+    // hardware for linear filtering.
+    vSupport.x = int(ceil(1.5 * blur_task.blur_radius)) * 2;
+
+    if (vSupport.x > 0) {
+        calculate_gauss_coefficients(blur_task.blur_radius);
+    } else {
+        // The gauss function gets NaNs when blur radius is zero.
+        vGaussCoefficients = vec2(1.0, 1.0);
+    }
+
+    switch (aBlurDirection) {
+        case DIR_HORIZONTAL:
+            vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
+            break;
+        case DIR_VERTICAL:
+            vOffsetScale = vec2(0.0, 1.0 / texture_size.y);
+            break;
+        default:
+            vOffsetScale = vec2(0.0);
+    }
+
+    vUvRect = vec4(src_rect.p0 + vec2(0.5),
+                   src_rect.p0 + blur_task.blur_region - vec2(0.5));
+    vUvRect /= texture_size.xyxy;
+
+    vec2 pos = mix(target_rect.p0, target_rect.p1, aPosition.xy);
+
+    vec2 uv0 = src_rect.p0 / texture_size;
+    vec2 uv1 = src_rect.p1 / texture_size;
+    vUv = mix(uv0, uv1, aPosition.xy);
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#if defined WR_FEATURE_COLOR_TARGET
+#define SAMPLE_TYPE vec4
+#define SAMPLE_TEXTURE(uv)  texture(sColor0, uv)
+#else
+#define SAMPLE_TYPE float
+#define SAMPLE_TEXTURE(uv)  texture(sColor0, uv).r
+#endif
+
+// TODO(gw): Write a fast path blur that handles smaller blur radii
+//           with a offset / weight uniform table and a constant
+//           loop iteration count!
+
+void main(void) {
+    SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv);
+
+    // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
+    vec3 gauss_coefficient = vec3(vGaussCoefficients,
+                                  vGaussCoefficients.y * vGaussCoefficients.y);
+
+    SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
+
+    // Evaluate two adjacent texels at a time. We can do this because, if c0
+    // and c1 are colors of adjacent texels and k0 and k1 are arbitrary
+    // factors, this formula:
+    //
+    //     k0 * c0 + k1 * c1          (Equation 1)
+    //
+    // is equivalent to:
+    //
+    //                                 k1
+    //     (k0 + k1) * lerp(c0, c1, -------)
+    //                              k0 + k1
+    //
+    // A texture lookup of adjacent texels evaluates this formula:
+    //
+    //     lerp(c0, c1, t)
+    //
+    // for some t. So we can let `t = k1/(k0 + k1)` and effectively evaluate
+    // Equation 1 with a single texture lookup.
+    //
+    // Clamp loop condition variable to a statically known value to workaround
+    // driver bug on Adreno 3xx. vSupport should not exceed 300 anyway, due to
+    // the max blur radius being 100. See bug 1720841 for details.
+    int support = min(vSupport.x, 300);
+    for (int i = 1; i <= support; i += 2) {
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+
+        float gauss_coefficient_subtotal = gauss_coefficient.x;
+        gauss_coefficient.xy *= gauss_coefficient.yz;
+        gauss_coefficient_subtotal += gauss_coefficient.x;
+        float gauss_ratio = gauss_coefficient.x / gauss_coefficient_subtotal;
+
+        vec2 offset = vOffsetScale * (float(i) + gauss_ratio);
+
+        vec2 st0 = max(vUv - offset, vUvRect.xy);
+        vec2 st1 = min(vUv + offset, vUvRect.zw);
+        avg_color += (SAMPLE_TEXTURE(st0) + SAMPLE_TEXTURE(st1)) *
+                     gauss_coefficient_subtotal;
+    }
+
+    oFragColor = vec4(avg_color);
+}
+
+#ifdef SWGL_DRAW_SPAN
+    #ifdef WR_FEATURE_COLOR_TARGET
+void swgl_drawSpanRGBA8() {
+    swgl_commitGaussianBlurRGBA8(sColor0, vUv, vUvRect, vOffsetScale.x != 0.0,
+                                 vSupport.x, vGaussCoefficients);
+}
+    #else
+void swgl_drawSpanR8() {
+    swgl_commitGaussianBlurR8(sColor0, vUv, vUvRect, vOffsetScale.x != 0.0,
+                              vSupport.x, vGaussCoefficients);
+}
+    #endif
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_border_segment.glsl b/gfx/wr/webrender/res/cs_border_segment.glsl
new file mode 100644
index 0000000000..e684bfa6df
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_border_segment.glsl
@@ -0,0 +1,450 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,ellipse
+
+// For edges, the colors are the same. For corners, these
+// are the colors of each edge making up the corner.
+flat varying mediump vec4 vColor00;
+flat varying mediump vec4 vColor01;
+flat varying mediump vec4 vColor10;
+flat varying mediump vec4 vColor11;
+
+// A point + tangent defining the line where the edge
+// transition occurs. Used for corners only.
+flat varying mediump vec4 vColorLine;
+
+// x: segment, y: clip mode
+// We cast these to/from floats rather than using an ivec due to a driver bug
+// on Adreno 3xx. See bug 1730458.
+flat varying mediump vec2 vSegmentClipMode;
+// x, y: styles, z, w: edge axes
+// We cast these to/from floats rather than using an ivec (and bitshifting)
+// due to a driver bug on Adreno 3xx. See bug 1730458.
+flat varying mediump vec4 vStyleEdgeAxis;
+
+// xy = Local space position of the clip center.
+// zw = Scale the rect origin by this to get the outer
+// corner from the segment rectangle.
+flat varying highp vec4 vClipCenter_Sign;
+
+// An outer and inner elliptical radii for border
+// corner clipping.
+flat varying mediump vec4 vClipRadii;
+
+// Reference point for determine edge clip lines.
+flat varying mediump vec4 vEdgeReference;
+
+// Stores widths/2 and widths/3 to save doing this in FS.
+flat varying mediump vec4 vPartialWidths;
+
+// Clipping parameters for dot or dash.
+flat varying mediump vec4 vClipParams1;
+flat varying mediump vec4 vClipParams2;
+
+// Local space position
+varying highp vec2 vPos;
+
+#define SEGMENT_TOP_LEFT        0
+#define SEGMENT_TOP_RIGHT       1
+#define SEGMENT_BOTTOM_RIGHT    2
+#define SEGMENT_BOTTOM_LEFT     3
+#define SEGMENT_LEFT            4
+#define SEGMENT_TOP             5
+#define SEGMENT_RIGHT           6
+#define SEGMENT_BOTTOM          7
+
+// Border styles as defined in webrender_api/types.rs
+#define BORDER_STYLE_NONE         0
+#define BORDER_STYLE_SOLID        1
+#define BORDER_STYLE_DOUBLE       2
+#define BORDER_STYLE_DOTTED       3
+#define BORDER_STYLE_DASHED       4
+#define BORDER_STYLE_HIDDEN       5
+#define BORDER_STYLE_GROOVE       6
+#define BORDER_STYLE_RIDGE        7
+#define BORDER_STYLE_INSET        8
+#define BORDER_STYLE_OUTSET       9
+
+#define CLIP_NONE        0
+#define CLIP_DASH_CORNER 1
+#define CLIP_DASH_EDGE   2
+#define CLIP_DOT         3
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aTaskOrigin;
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in int aFlags;
+PER_INSTANCE in vec2 aWidths;
+PER_INSTANCE in vec2 aRadii;
+PER_INSTANCE in vec4 aClipParams1;
+PER_INSTANCE in vec4 aClipParams2;
+
+vec2 get_outer_corner_scale(int segment) {
+    vec2 p;
+
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            p = vec2(0.0, 0.0);
+            break;
+        case SEGMENT_TOP_RIGHT:
+            p = vec2(1.0, 0.0);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            p = vec2(1.0, 1.0);
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            p = vec2(0.0, 1.0);
+            break;
+        default:
+            // The result is only used for non-default segment cases
+            p = vec2(0.0);
+            break;
+    }
+
+    return p;
+}
+
+// NOTE(emilio): If you change this algorithm, do the same change
+// in border.rs
+vec4 mod_color(vec4 color, bool is_black, bool lighter) {
+    const float light_black = 0.7;
+    const float dark_black = 0.3;
+
+    const float dark_scale = 0.66666666;
+    const float light_scale = 1.0;
+
+    if (is_black) {
+        if (lighter) {
+            return vec4(vec3(light_black), color.a);
+        }
+        return vec4(vec3(dark_black), color.a);
+    }
+
+    if (lighter) {
+        return vec4(color.rgb * light_scale, color.a);
+    }
+    return vec4(color.rgb * dark_scale, color.a);
+}
+
+vec4[2] get_colors_for_side(vec4 color, int style) {
+    vec4 result[2];
+
+    bool is_black = color.rgb == vec3(0.0, 0.0, 0.0);
+
+    switch (style) {
+        case BORDER_STYLE_GROOVE:
+            result[0] = mod_color(color, is_black, true);
+            result[1] = mod_color(color, is_black, false);
+            break;
+        case BORDER_STYLE_RIDGE:
+            result[0] = mod_color(color, is_black, false);
+            result[1] = mod_color(color, is_black, true);
+            break;
+        default:
+            result[0] = color;
+            result[1] = color;
+            break;
+    }
+
+    return result;
+}
+
+void main(void) {
+    int segment = aFlags & 0xff;
+    int style0 = (aFlags >> 8) & 0xff;
+    int style1 = (aFlags >> 16) & 0xff;
+    int clip_mode = (aFlags >> 24) & 0x0f;
+
+    vec2 size = aRect.zw - aRect.xy;
+    vec2 outer_scale = get_outer_corner_scale(segment);
+    vec2 outer = outer_scale * size;
+    vec2 clip_sign = 1.0 - 2.0 * outer_scale;
+
+    // Set some flags used by the FS to determine the
+    // orientation of the two edges in this corner.
+    ivec2 edge_axis = ivec2(0, 0);
+    // Derive the positions for the edge clips, which must be handled
+    // differently between corners and edges.
+    vec2 edge_reference = vec2(0.0);
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            edge_axis = ivec2(0, 1);
+            edge_reference = outer;
+            break;
+        case SEGMENT_TOP_RIGHT:
+            edge_axis = ivec2(1, 0);
+            edge_reference = vec2(outer.x - aWidths.x, outer.y);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            edge_axis = ivec2(0, 1);
+            edge_reference = outer - aWidths;
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            edge_axis = ivec2(1, 0);
+            edge_reference = vec2(outer.x, outer.y - aWidths.y);
+            break;
+        case SEGMENT_TOP:
+        case SEGMENT_BOTTOM:
+            edge_axis = ivec2(1, 1);
+            break;
+        case SEGMENT_LEFT:
+        case SEGMENT_RIGHT:
+        default:
+            break;
+    }
+
+    vSegmentClipMode = vec2(float(segment), float(clip_mode));
+    vStyleEdgeAxis = vec4(float(style0), float(style1), float(edge_axis.x), float(edge_axis.y));
+
+    vPartialWidths = vec4(aWidths / 3.0, aWidths / 2.0);
+    vPos = size * aPosition.xy;
+
+    vec4[2] color0 = get_colors_for_side(aColor0, style0);
+    vColor00 = color0[0];
+    vColor01 = color0[1];
+    vec4[2] color1 = get_colors_for_side(aColor1, style1);
+    vColor10 = color1[0];
+    vColor11 = color1[1];
+    vClipCenter_Sign = vec4(outer + clip_sign * aRadii, clip_sign);
+    vClipRadii = vec4(aRadii, max(aRadii - aWidths, 0.0));
+    vColorLine = vec4(outer, aWidths.y * -clip_sign.y, aWidths.x * clip_sign.x);
+    vEdgeReference = vec4(edge_reference, edge_reference + aWidths);
+    vClipParams1 = aClipParams1;
+    vClipParams2 = aClipParams2;
+
+    // For the case of dot and dash clips, optimize the number of pixels that
+    // are hit to just include the dot itself.
+    if (clip_mode == CLIP_DOT) {
+        float radius = aClipParams1.z;
+
+        // Expand by a small amount to allow room for AA around
+        // the dot if it's big enough.
+        if (radius > 0.5)
+            radius += 2.0;
+
+        vPos = vClipParams1.xy + radius * (2.0 * aPosition.xy - 1.0);
+        vPos = clamp(vPos, vec2(0.0), size);
+    } else if (clip_mode == CLIP_DASH_CORNER) {
+        vec2 center = (aClipParams1.xy + aClipParams2.xy) * 0.5;
+        // This is a gross approximation which works out because dashes don't have
+        // a strong curvature and we will overshoot by inflating the geometry by
+        // this amount on each side (sqrt(2) * length(dash) would be enough and we
+        // compute 2 * approx_length(dash)).
+        float dash_length = length(aClipParams1.xy - aClipParams2.xy);
+        float width = max(aWidths.x, aWidths.y);
+        // expand by a small amout for AA just like we do for dots.
+        vec2 r = vec2(max(dash_length, width)) + 2.0;
+        vPos = clamp(vPos, center - r, center + r);
+    }
+
+    gl_Position = uTransform * vec4(aTaskOrigin + aRect.xy + vPos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+vec4 evaluate_color_for_style_in_corner(
+    vec2 clip_relative_pos,
+    int style,
+    vec4 color0,
+    vec4 color1,
+    vec4 clip_radii,
+    float mix_factor,
+    int segment,
+    float aa_range
+) {
+    switch (style) {
+        case BORDER_STYLE_DOUBLE: {
+            // Get the distances from 0.33 of the radii, and
+            // also 0.67 of the radii. Use these to form a
+            // SDF subtraction which will clip out the inside
+            // third of the rounded edge.
+            float d_radii_a = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - vPartialWidths.xy
+            );
+            float d_radii_b = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - 2.0 * vPartialWidths.xy
+            );
+            float d = min(-d_radii_a, d_radii_b);
+            color0 *= distance_aa(aa_range, d);
+            break;
+        }
+        case BORDER_STYLE_GROOVE:
+        case BORDER_STYLE_RIDGE: {
+            float d = distance_to_ellipse(
+                clip_relative_pos,
+                clip_radii.xy - vPartialWidths.zw
+            );
+            float alpha = distance_aa(aa_range, d);
+            float swizzled_factor;
+            switch (segment) {
+                case SEGMENT_TOP_LEFT: swizzled_factor = 0.0; break;
+                case SEGMENT_TOP_RIGHT: swizzled_factor = mix_factor; break;
+                case SEGMENT_BOTTOM_RIGHT: swizzled_factor = 1.0; break;
+                case SEGMENT_BOTTOM_LEFT: swizzled_factor = 1.0 - mix_factor; break;
+                default: swizzled_factor = 0.0; break;
+            };
+            vec4 c0 = mix(color1, color0, swizzled_factor);
+            vec4 c1 = mix(color0, color1, swizzled_factor);
+            color0 = mix(c0, c1, alpha);
+            break;
+        }
+        default:
+            break;
+    }
+
+    return color0;
+}
+
+vec4 evaluate_color_for_style_in_edge(
+    vec2 pos_vec,
+    int style,
+    vec4 color0,
+    vec4 color1,
+    float aa_range,
+    int edge_axis_id
+) {
+    vec2 edge_axis = edge_axis_id != 0 ? vec2(0.0, 1.0) : vec2(1.0, 0.0);
+    float pos = dot(pos_vec, edge_axis);
+    switch (style) {
+        case BORDER_STYLE_DOUBLE: {
+            float d = -1.0;
+            float partial_width = dot(vPartialWidths.xy, edge_axis);
+            if (partial_width >= 1.0) {
+                vec2 ref = vec2(
+                    dot(vEdgeReference.xy, edge_axis) + partial_width,
+                    dot(vEdgeReference.zw, edge_axis) - partial_width
+                );
+                d = min(pos - ref.x, ref.y - pos);
+            }
+            color0 *= distance_aa(aa_range, d);
+            break;
+        }
+        case BORDER_STYLE_GROOVE:
+        case BORDER_STYLE_RIDGE: {
+            float ref = dot(vEdgeReference.xy + vPartialWidths.zw, edge_axis);
+            float d = pos - ref;
+            float alpha = distance_aa(aa_range, d);
+            color0 = mix(color0, color1, alpha);
+            break;
+        }
+        default:
+            break;
+    }
+
+    return color0;
+}
+
+void main(void) {
+    float aa_range = compute_aa_range(vPos);
+    vec4 color0, color1;
+
+    int segment = int(vSegmentClipMode.x);
+    int clip_mode = int(vSegmentClipMode.y);
+    ivec2 style = ivec2(int(vStyleEdgeAxis.x), int(vStyleEdgeAxis.y));
+    ivec2 edge_axis = ivec2(int(vStyleEdgeAxis.z), int(vStyleEdgeAxis.w));
+
+    float mix_factor = 0.0;
+    if (edge_axis.x != edge_axis.y) {
+        float d_line = distance_to_line(vColorLine.xy, vColorLine.zw, vPos);
+        mix_factor = distance_aa(aa_range, -d_line);
+    }
+
+    // Check if inside corner clip-region
+    vec2 clip_relative_pos = vPos - vClipCenter_Sign.xy;
+    bool in_clip_region = all(lessThan(vClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    float d = -1.0;
+
+    switch (clip_mode) {
+        case CLIP_DOT: {
+            // Set clip distance based or dot position and radius.
+            d = distance(vClipParams1.xy, vPos) - vClipParams1.z;
+            break;
+        }
+        case CLIP_DASH_EDGE: {
+            bool is_vertical = vClipParams1.x == 0.;
+            float half_dash = is_vertical ? vClipParams1.y : vClipParams1.x;
+            // We want to draw something like:
+            // +---+---+---+---+
+            // |xxx|   |   |xxx|
+            // +---+---+---+---+
+            float pos = is_vertical ? vPos.y : vPos.x;
+            bool in_dash = pos < half_dash || pos > 3.0 * half_dash;
+            if (!in_dash) {
+                d = 1.;
+            }
+            break;
+        }
+        case CLIP_DASH_CORNER: {
+            // Get SDF for the two line/tangent clip lines,
+            // do SDF subtract to get clip distance.
+            float d0 = distance_to_line(vClipParams1.xy,
+                                        vClipParams1.zw,
+                                        vPos);
+            float d1 = distance_to_line(vClipParams2.xy,
+                                        vClipParams2.zw,
+                                        vPos);
+            d = max(d0, -d1);
+            break;
+        }
+        case CLIP_NONE:
+        default:
+            break;
+    }
+
+    if (in_clip_region) {
+        float d_radii_a = distance_to_ellipse(clip_relative_pos, vClipRadii.xy);
+        float d_radii_b = distance_to_ellipse(clip_relative_pos, vClipRadii.zw);
+        float d_radii = max(d_radii_a, -d_radii_b);
+        d = max(d, d_radii);
+
+        color0 = evaluate_color_for_style_in_corner(
+            clip_relative_pos,
+            style.x,
+            vColor00,
+            vColor01,
+            vClipRadii,
+            mix_factor,
+            segment,
+            aa_range
+        );
+        color1 = evaluate_color_for_style_in_corner(
+            clip_relative_pos,
+            style.y,
+            vColor10,
+            vColor11,
+            vClipRadii,
+            mix_factor,
+            segment,
+            aa_range
+        );
+    } else {
+        color0 = evaluate_color_for_style_in_edge(
+            vPos,
+            style.x,
+            vColor00,
+            vColor01,
+            aa_range,
+            edge_axis.x
+        );
+        color1 = evaluate_color_for_style_in_edge(
+            vPos,
+            style.y,
+            vColor10,
+            vColor11,
+            aa_range,
+            edge_axis.y
+        );
+    }
+
+    float alpha = distance_aa(aa_range, d);
+    vec4 color = mix(color0, color1, mix_factor);
+    oFragColor = color * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_border_solid.glsl b/gfx/wr/webrender/res/cs_border_solid.glsl
new file mode 100644
index 0000000000..460646e21b
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_border_solid.glsl
@@ -0,0 +1,178 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,ellipse
+
+#define DONT_MIX 0
+#define MIX_AA 1
+#define MIX_NO_AA 2
+
+// For edges, the colors are the same. For corners, these
+// are the colors of each edge making up the corner.
+flat varying mediump vec4 vColor0;
+flat varying mediump vec4 vColor1;
+
+// A point + tangent defining the line where the edge
+// transition occurs. Used for corners only.
+flat varying highp vec4 vColorLine;
+
+// A boolean indicating that we should be mixing between edge colors.
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump ivec2 vMixColors;
+
+// xy = Local space position of the clip center.
+// zw = Scale the rect origin by this to get the outer
+// corner from the segment rectangle.
+flat varying highp vec4 vClipCenter_Sign;
+
+// An outer and inner elliptical radii for border
+// corner clipping.
+flat varying highp vec4 vClipRadii;
+
+// Position, scale, and radii of horizontally and vertically adjacent corner clips.
+flat varying highp vec4 vHorizontalClipCenter_Sign;
+flat varying highp vec2 vHorizontalClipRadii;
+flat varying highp vec4 vVerticalClipCenter_Sign;
+flat varying highp vec2 vVerticalClipRadii;
+
+// Local space position
+varying highp vec2 vPos;
+
+#define SEGMENT_TOP_LEFT        0
+#define SEGMENT_TOP_RIGHT       1
+#define SEGMENT_BOTTOM_RIGHT    2
+#define SEGMENT_BOTTOM_LEFT     3
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aTaskOrigin;
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in int aFlags;
+PER_INSTANCE in vec2 aWidths;
+PER_INSTANCE in vec2 aRadii;
+PER_INSTANCE in vec4 aClipParams1;
+PER_INSTANCE in vec4 aClipParams2;
+
+vec2 get_outer_corner_scale(int segment) {
+    vec2 p;
+
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+            p = vec2(0.0, 0.0);
+            break;
+        case SEGMENT_TOP_RIGHT:
+            p = vec2(1.0, 0.0);
+            break;
+        case SEGMENT_BOTTOM_RIGHT:
+            p = vec2(1.0, 1.0);
+            break;
+        case SEGMENT_BOTTOM_LEFT:
+            p = vec2(0.0, 1.0);
+            break;
+        default:
+            // The result is only used for non-default segment cases
+            p = vec2(0.0);
+            break;
+    }
+
+    return p;
+}
+
+void main(void) {
+    int segment = aFlags & 0xff;
+    bool do_aa = ((aFlags >> 24) & 0xf0) != 0;
+
+    vec2 outer_scale = get_outer_corner_scale(segment);
+    vec2 size = aRect.zw - aRect.xy;
+    vec2 outer = outer_scale * size;
+    vec2 clip_sign = 1.0 - 2.0 * outer_scale;
+
+    int mix_colors;
+    switch (segment) {
+        case SEGMENT_TOP_LEFT:
+        case SEGMENT_TOP_RIGHT:
+        case SEGMENT_BOTTOM_RIGHT:
+        case SEGMENT_BOTTOM_LEFT: {
+            mix_colors = do_aa ? MIX_AA : MIX_NO_AA;
+            break;
+        }
+        default:
+            mix_colors = DONT_MIX;
+            break;
+    }
+
+    vMixColors.x = mix_colors;
+    vPos = size * aPosition.xy;
+
+    vColor0 = aColor0;
+    vColor1 = aColor1;
+    vClipCenter_Sign = vec4(outer + clip_sign * aRadii, clip_sign);
+    vClipRadii = vec4(aRadii, max(aRadii - aWidths, 0.0));
+    vColorLine = vec4(outer, aWidths.y * -clip_sign.y, aWidths.x * clip_sign.x);
+
+    vec2 horizontal_clip_sign = vec2(-clip_sign.x, clip_sign.y);
+    vHorizontalClipCenter_Sign = vec4(aClipParams1.xy +
+                                      horizontal_clip_sign * aClipParams1.zw,
+                                      horizontal_clip_sign);
+    vHorizontalClipRadii = aClipParams1.zw;
+
+    vec2 vertical_clip_sign = vec2(clip_sign.x, -clip_sign.y);
+    vVerticalClipCenter_Sign = vec4(aClipParams2.xy +
+                                    vertical_clip_sign * aClipParams2.zw,
+                                    vertical_clip_sign);
+    vVerticalClipRadii = aClipParams2.zw;
+
+    gl_Position = uTransform * vec4(aTaskOrigin + aRect.xy + vPos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float aa_range = compute_aa_range(vPos);
+    bool do_aa = vMixColors.x != MIX_NO_AA;
+
+    float mix_factor = 0.0;
+    if (vMixColors.x != DONT_MIX) {
+        float d_line = distance_to_line(vColorLine.xy, vColorLine.zw, vPos);
+        if (do_aa) {
+            mix_factor = distance_aa(aa_range, -d_line);
+        } else {
+            mix_factor = d_line + EPSILON >= 0. ? 1.0 : 0.0;
+        }
+    }
+
+    // Check if inside main corner clip-region
+    vec2 clip_relative_pos = vPos - vClipCenter_Sign.xy;
+    bool in_clip_region = all(lessThan(vClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+
+    float d = -1.0;
+    if (in_clip_region) {
+        float d_radii_a = distance_to_ellipse(clip_relative_pos, vClipRadii.xy);
+        float d_radii_b = distance_to_ellipse(clip_relative_pos, vClipRadii.zw);
+        d = max(d_radii_a, -d_radii_b);
+    }
+
+    // And again for horizontally-adjacent corner
+    clip_relative_pos = vPos - vHorizontalClipCenter_Sign.xy;
+    in_clip_region = all(lessThan(vHorizontalClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    if (in_clip_region) {
+        float d_radii = distance_to_ellipse(clip_relative_pos, vHorizontalClipRadii.xy);
+        d = max(d_radii, d);
+    }
+
+    // And finally for vertically-adjacent corner
+    clip_relative_pos = vPos - vVerticalClipCenter_Sign.xy;
+    in_clip_region = all(lessThan(vVerticalClipCenter_Sign.zw * clip_relative_pos, vec2(0.0)));
+    if (in_clip_region) {
+        float d_radii = distance_to_ellipse(clip_relative_pos, vVerticalClipRadii.xy);
+        d = max(d_radii, d);
+    }
+
+    float alpha = do_aa ? distance_aa(aa_range, d) : 1.0;
+    vec4 color = mix(vColor0, vColor1, mix_factor);
+    oFragColor = color * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_box_shadow.glsl b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
new file mode 100644
index 0000000000..37a983c759
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_box_shadow.glsl
@@ -0,0 +1,327 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared
+
+varying highp vec4 vLocalPos;
+varying highp vec2 vUv;
+flat varying highp vec4 vUvBounds;
+flat varying mediump vec4 vEdge;
+flat varying highp vec4 vUvBounds_NoClamp;
+// Clip mode. Packed in to a vector to avoid bug 1630356.
+flat varying mediump vec2 vClipMode;
+
+#define MODE_STRETCH        0
+#define MODE_SIMPLE         1
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in ivec2 aClipDataResourceAddress;
+PER_INSTANCE in vec2 aClipSrcRectSize;
+PER_INSTANCE in int aClipMode;
+PER_INSTANCE in ivec2 aStretchMode;
+PER_INSTANCE in vec4 aClipDestRect;
+
+struct ClipMaskInstanceBoxShadow {
+    ClipMaskInstanceCommon base;
+    ivec2 resource_address;
+};
+
+ClipMaskInstanceBoxShadow fetch_clip_item() {
+    ClipMaskInstanceBoxShadow cmi;
+
+    cmi.base = fetch_clip_item_common();
+    cmi.resource_address = aClipDataResourceAddress;
+
+    return cmi;
+}
+
+struct BoxShadowData {
+    vec2 src_rect_size;
+    int clip_mode;
+    int stretch_mode_x;
+    int stretch_mode_y;
+    RectWithEndpoint dest_rect;
+};
+
+BoxShadowData fetch_data() {
+    BoxShadowData bs_data = BoxShadowData(
+        aClipSrcRectSize,
+        aClipMode,
+        aStretchMode.x,
+        aStretchMode.y,
+        RectWithEndpoint(aClipDestRect.xy, aClipDestRect.zw)
+    );
+    return bs_data;
+}
+
+void main(void) {
+    ClipMaskInstanceBoxShadow cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    BoxShadowData bs_data = fetch_data();
+    ImageSource res = fetch_image_source_direct(cmi.resource_address);
+
+    RectWithEndpoint dest_rect = bs_data.dest_rect;
+
+    ClipVertexInfo vi = write_clip_tile_vertex(
+        dest_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+    vClipMode.x = float(bs_data.clip_mode);
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 local_pos = vi.local_pos.xy / vi.local_pos.w;
+    vLocalPos = vi.local_pos;
+    vec2 dest_rect_size = rect_size(dest_rect);
+
+    switch (bs_data.stretch_mode_x) {
+        case MODE_STRETCH: {
+            vEdge.x = 0.5;
+            vEdge.z = (dest_rect_size.x / bs_data.src_rect_size.x) - 0.5;
+            vUv.x = (local_pos.x - dest_rect.p0.x) / bs_data.src_rect_size.x;
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            vEdge.xz = vec2(1.0);
+            vUv.x = (local_pos.x - dest_rect.p0.x) / dest_rect_size.x;
+            break;
+        }
+    }
+
+    switch (bs_data.stretch_mode_y) {
+        case MODE_STRETCH: {
+            vEdge.y = 0.5;
+            vEdge.w = (dest_rect_size.y / bs_data.src_rect_size.y) - 0.5;
+            vUv.y = (local_pos.y - dest_rect.p0.y) / bs_data.src_rect_size.y;
+            break;
+        }
+        case MODE_SIMPLE:
+        default: {
+            vEdge.yw = vec2(1.0);
+            vUv.y = (local_pos.y - dest_rect.p0.y) / dest_rect_size.y;
+            break;
+        }
+    }
+
+    vUv *= vi.local_pos.w;
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+    vUvBounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5)) / texture_size.xyxy;
+    vUvBounds_NoClamp = vec4(uv0, uv1) / texture_size.xyxy;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    vec2 uv_linear = vUv / vLocalPos.w;
+    vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+    uv += max(vec2(0.0), uv_linear - vEdge.zw);
+    uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+    uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
+
+    float in_shadow_rect = init_transform_rough_fs(vLocalPos.xy / vLocalPos.w);
+
+    float texel = TEX_SAMPLE(sColor0, uv).r;
+
+    float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+    float result = vLocalPos.w > 0.0 ? mix(vClipMode.x, alpha, in_shadow_rect) : 0.0;
+
+    oFragColor = vec4(result);
+}
+
+#ifdef SWGL_DRAW_SPAN
+// As with cs_clip_rectangle, this shader spends a lot of time doing clipping and
+// combining for every fragment, even if outside of the primitive to initialize
+// the clip tile, or inside the inner bounds of the primitive, where the shadow
+// is unnecessary. To alleviate this, the span shader attempts to first intersect
+// the the local clip bounds, outside of which we can just use a solid fill
+// to initialize those clip tile fragments. Once inside the primitive bounds,
+// we further intersect with the inner region where no shadow is necessary either
+// so that we can commit entire spans of texture within this nine-patch region
+// instead of having to do the work of mapping per fragment.
+void swgl_drawSpanR8() {
+    // Perspective is not supported.
+    if (swgl_interpStep(vLocalPos).w != 0.0) {
+        return;
+    }
+
+    // If the span is completely outside the Z-range and clipped out, just
+    // output clear so we don't need to consider invalid W in the rest of the
+    // shader.
+    float w = swgl_forceScalar(vLocalPos.w);
+    if (w <= 0.0) {
+        swgl_commitSolidR8(0.0);
+        return;
+    }
+
+    // To start, we evaluate the box shadow in both UV and local space relative
+    // to the local-space position. This will be interpolated across the span to
+    // track whether we intersect the nine-patch.
+    w = 1.0 / w;
+    vec2 uv_linear = vUv * w;
+    vec2 uv_linear0 = swgl_forceScalar(uv_linear);
+    vec2 uv_linear_step = swgl_interpStep(vUv).xy * w;
+    vec2 local_pos = vLocalPos.xy * w;
+    vec2 local_pos0 = swgl_forceScalar(local_pos);
+    vec2 local_step = swgl_interpStep(vLocalPos).xy * w;
+
+    // We need to compute the local-space distance to the bounding box and then
+    // figure out how many processing steps that maps to. If we are stepping in
+    // a negative direction on an axis, we need to swap the sides of the box
+    // which we consider as the start or end. If there is no local-space step
+    // on an axis (i.e. constant Y), we need to take care to force the steps to
+    // either the start or end of the span depending on if we are inside or
+    // outside of the bounding box.
+    vec4 clip_dist =
+        mix(vTransformBounds, vTransformBounds.zwxy, lessThan(local_step, vec2(0.0)).xyxy)
+            - local_pos0.xyxy;
+    clip_dist =
+        mix(1.0e6 * step(0.0, clip_dist),
+            clip_dist * recip(local_step).xyxy,
+            notEqual(local_step, vec2(0.0)).xyxy);
+
+    // Find the start and end of the shadowed region on this span.
+    float shadow_start = max(clip_dist.x, clip_dist.y);
+    float shadow_end = min(clip_dist.z, clip_dist.w);
+
+    // Flip the offsets from the start of the span so we can compare against the
+    // remaining span length which automatically deducts as we commit fragments.
+    ivec2 shadow_steps = ivec2(clamp(
+        swgl_SpanLength - swgl_StepSize * vec2(floor(shadow_start), ceil(shadow_end)),
+        0.0, swgl_SpanLength));
+    int shadow_start_len = shadow_steps.x;
+    int shadow_end_len = shadow_steps.y;
+
+    // Likewise, once inside the primitive bounds, we also need to track which
+    // sector of the nine-patch we are in which requires intersecting against
+    // the inner box instead of the outer box.
+    vec4 opaque_dist =
+        mix(vEdge, vEdge.zwxy, lessThan(uv_linear_step, vec2(0.0)).xyxy)
+            - uv_linear0.xyxy;
+    opaque_dist =
+        mix(1.0e6 * step(0.0, opaque_dist),
+            opaque_dist * recip(uv_linear_step).xyxy,
+            notEqual(uv_linear_step, vec2(0.0)).xyxy);
+
+    // Unlike for the shadow clipping bounds, here we need to rather find the floor of all
+    // the offsets so that we don't accidentally process any chunks in the transitional areas
+    // between sectors of the nine-patch.
+    ivec4 opaque_steps = ivec4(clamp(
+        swgl_SpanLength -
+            swgl_StepSize *
+                vec4(floor(opaque_dist.x), floor(opaque_dist.y), floor(opaque_dist.z), floor(opaque_dist.w)),
+        shadow_end_len, swgl_SpanLength));
+
+    // Fill any initial sections of the span that are clipped out based on clip mode.
+    if (swgl_SpanLength > shadow_start_len) {
+        int num_before = swgl_SpanLength - shadow_start_len;
+        swgl_commitPartialSolidR8(num_before, vClipMode.x);
+        float steps_before = float(num_before / swgl_StepSize);
+        uv_linear += steps_before * uv_linear_step;
+        local_pos += steps_before * local_step;
+    }
+
+    // This loop tries to repeatedly process entire spans of the nine-patch that map
+    // to a contiguous spans of texture in the source box shadow. First, we process
+    // a chunk with per-fragment clipping and mapping in case we're starting on a
+    // transitional region between sectors of the nine-patch which may need to map
+    // to different spans of texture per-fragment. After, we find the largest span
+    // within the current sector before we hit the next transitional region, and
+    // attempt to commit an entire span of texture therein.
+    while (swgl_SpanLength > 0) {
+        // Here we might be in a transitional chunk, so do everything per-fragment.
+        {
+            vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+            uv += max(vec2(0.0), uv_linear - vEdge.zw);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            uv = clamp(uv, vUvBounds.xy, vUvBounds.zw);
+
+            float in_shadow_rect = init_transform_rough_fs(local_pos);
+
+            float texel = TEX_SAMPLE(sColor0, uv).r;
+
+            float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+            float result = mix(vClipMode.x, alpha, in_shadow_rect);
+            swgl_commitColorR8(result);
+
+            uv_linear += uv_linear_step;
+            local_pos += local_step;
+        }
+        // If we now hit the end of the clip bounds, just bail out since there is
+        // no more shadow to map.
+        if (swgl_SpanLength <= shadow_end_len) {
+            break;
+        }
+        // By here we've determined to be still inside the nine-patch. We need to
+        // compare against the inner rectangle thresholds to see which sector of
+        // the nine-patch to use and thus how to map the box shadow texture. Stop
+        // at least one step before the end of the shadow region to properly clip
+        // on the boundary.
+        int num_inside = swgl_SpanLength - swgl_StepSize - shadow_end_len;
+        vec4 uv_bounds = vUvBounds;
+        if (swgl_SpanLength >= opaque_steps.y) {
+            // We're in the top Y band of the nine-patch.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.y);
+        } else if (swgl_SpanLength >= opaque_steps.w) {
+            // We're in the middle Y band of the nine-patch. Set the UV clamp bounds
+            // to the vertical center texel of the box shadow.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.w);
+            uv_bounds.yw = vec2(clamp(mix(vUvBounds_NoClamp.y, vUvBounds_NoClamp.w, vEdge.y),
+                                      vUvBounds.y, vUvBounds.w));
+        }
+        if (swgl_SpanLength >= opaque_steps.x) {
+            // We're in the left X column of the nine-patch.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.x);
+        } else if (swgl_SpanLength >= opaque_steps.z) {
+            // We're in the middle X band of the nine-patch. Set the UV clamp bounds
+            // to the horizontal center texel of the box shadow.
+            num_inside = min(num_inside, swgl_SpanLength - opaque_steps.z);
+            uv_bounds.xz = vec2(clamp(mix(vUvBounds_NoClamp.x, vUvBounds_NoClamp.z, vEdge.x),
+                                      vUvBounds.x, vUvBounds.z));
+        }
+        if (num_inside > 0) {
+            // We have a non-zero span of fragments within the sector. Map to the UV
+            // start offset of the sector and the UV offset within the sector.
+            vec2 uv = clamp(uv_linear, vec2(0.0), vEdge.xy);
+            uv += max(vec2(0.0), uv_linear - vEdge.zw);
+            uv = mix(vUvBounds_NoClamp.xy, vUvBounds_NoClamp.zw, uv);
+            // If we're in the center sector of the nine-patch, then we only need to
+            // sample from a single texel of the box shadow. Just sample that single
+            // texel once and output it for the entire span. Otherwise, we just need
+            // to commit an actual span of texture from the box shadow. Depending on
+            // if we are in clip-out mode, we may need to invert the source texture.
+            if (uv_bounds.xy == uv_bounds.zw) {
+                uv = clamp(uv, uv_bounds.xy, uv_bounds.zw);
+                float texel = TEX_SAMPLE(sColor0, uv).r;
+                float alpha = mix(texel, 1.0 - texel, vClipMode.x);
+                swgl_commitPartialSolidR8(num_inside, alpha);
+            } else if (vClipMode.x != 0.0) {
+                swgl_commitPartialTextureLinearInvertR8(num_inside, sColor0, uv, uv_bounds);
+            } else {
+                swgl_commitPartialTextureLinearR8(num_inside, sColor0, uv, uv_bounds);
+            }
+            float steps_inside = float(num_inside / swgl_StepSize);
+            uv_linear += steps_inside * uv_linear_step;
+            local_pos += steps_inside * local_step;
+        }
+        // By here we're probably in a transitional chunk of the nine-patch that
+        // requires per-fragment processing, so loop around again to the handler
+        // for that case.
+    }
+
+    // Fill any remaining sections of the span that are clipped out.
+    if (swgl_SpanLength > 0) {
+        swgl_commitPartialSolidR8(swgl_SpanLength, vClipMode.x);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_image.glsl b/gfx/wr/webrender/res/cs_clip_image.glsl
new file mode 100644
index 0000000000..24ba1dab8e
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_image.glsl
@@ -0,0 +1,117 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared
+
+varying highp vec2 vLocalPos;
+varying highp vec2 vClipMaskImageUv;
+
+flat varying highp vec4 vClipMaskUvInnerRect;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aClipTileRect;
+PER_INSTANCE in ivec2 aClipDataResourceAddress;
+PER_INSTANCE in vec4 aClipLocalRect;
+
+struct ClipMaskInstanceImage {
+    ClipMaskInstanceCommon base;
+    RectWithEndpoint tile_rect;
+    ivec2 resource_address;
+    RectWithEndpoint local_rect;
+};
+
+ClipMaskInstanceImage fetch_clip_item() {
+    ClipMaskInstanceImage cmi;
+
+    cmi.base = fetch_clip_item_common();
+
+    cmi.tile_rect = RectWithEndpoint(aClipTileRect.xy, aClipTileRect.zw);
+    cmi.resource_address = aClipDataResourceAddress;
+    cmi.local_rect = RectWithEndpoint(aClipLocalRect.xy, aClipLocalRect.zw);
+
+    return cmi;
+}
+
+struct ClipImageVertexInfo {
+    vec2 local_pos;
+    vec4 world_pos;
+};
+
+// This differs from write_clip_tile_vertex in that we forward transform the
+// primitive's local-space tile rect into the target space. We use scissoring
+// to ensure that the primitive does not draw outside the target bounds.
+ClipImageVertexInfo write_clip_image_vertex(RectWithEndpoint tile_rect,
+                                            RectWithEndpoint local_clip_rect,
+                                            Transform prim_transform,
+                                            Transform clip_transform,
+                                            RectWithEndpoint sub_rect,
+                                            vec2 task_origin,
+                                            vec2 screen_origin,
+                                            float device_pixel_scale) {
+    vec2 local_pos = rect_clamp(local_clip_rect, mix(tile_rect.p0, tile_rect.p1, aPosition.xy));
+    vec4 world_pos = prim_transform.m * vec4(local_pos, 0.0, 1.0);
+    vec4 final_pos = vec4(
+        world_pos.xy * device_pixel_scale + (task_origin - screen_origin) * world_pos.w,
+        0.0,
+        world_pos.w
+    );
+    gl_Position = uTransform * final_pos;
+
+    init_transform_vs(
+        clip_transform.is_axis_aligned
+            ? vec4(vec2(-1.0e16), vec2(1.0e16))
+            : vec4(local_clip_rect.p0, local_clip_rect.p1));
+
+    ClipImageVertexInfo vi = ClipImageVertexInfo(local_pos, world_pos);
+    return vi;
+}
+
+void main(void) {
+    ClipMaskInstanceImage cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    ImageSource res = fetch_image_source_direct(cmi.resource_address);
+
+    ClipImageVertexInfo vi = write_clip_image_vertex(
+        cmi.tile_rect,
+        cmi.local_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+    vLocalPos = vi.local_pos;
+    vec2 uv = (vi.local_pos - cmi.tile_rect.p0) / rect_size(cmi.tile_rect);
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec4 uv_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
+    vClipMaskImageUv = mix(uv_rect.xy, uv_rect.zw, uv) / texture_size;
+
+    // applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
+    vClipMaskUvInnerRect = (uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = init_transform_rough_fs(vLocalPos);
+    vec2 source_uv = clamp(vClipMaskImageUv, vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
+    float clip_alpha = texture(sColor0, source_uv).r; //careful: texture has type A8
+    oFragColor = vec4(mix(1.0, clip_alpha, alpha), 0.0, 0.0, 1.0);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanR8() {
+    if (has_valid_transform_bounds()) {
+        return;
+    }
+
+    swgl_commitTextureLinearR8(sColor0, vClipMaskImageUv, vClipMaskUvInnerRect);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_clip_rectangle.glsl b/gfx/wr/webrender/res/cs_clip_rectangle.glsl
new file mode 100644
index 0000000000..8a6aabc710
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_clip_rectangle.glsl
@@ -0,0 +1,498 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,clip_shared,ellipse
+
+varying highp vec4 vLocalPos;
+#ifdef WR_FEATURE_FAST_PATH
+flat varying mediump vec3 vClipParams;      // xy = box size, z = radius
+#else
+flat varying highp vec4 vClipCenter_Radius_TL;
+flat varying highp vec4 vClipCenter_Radius_TR;
+flat varying highp vec4 vClipCenter_Radius_BL;
+flat varying highp vec4 vClipCenter_Radius_BR;
+flat varying highp vec3 vClipPlane_TL;
+flat varying highp vec3 vClipPlane_TR;
+flat varying highp vec3 vClipPlane_BL;
+flat varying highp vec3 vClipPlane_BR;
+#endif
+// Clip mode. Packed in to a vector to work around bug 1630356.
+flat varying mediump vec2 vClipMode;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec2 aClipLocalPos;
+PER_INSTANCE in vec4 aClipLocalRect;
+PER_INSTANCE in float aClipMode;
+PER_INSTANCE in vec4 aClipRect_TL;
+PER_INSTANCE in vec4 aClipRadii_TL;
+PER_INSTANCE in vec4 aClipRect_TR;
+PER_INSTANCE in vec4 aClipRadii_TR;
+PER_INSTANCE in vec4 aClipRect_BL;
+PER_INSTANCE in vec4 aClipRadii_BL;
+PER_INSTANCE in vec4 aClipRect_BR;
+PER_INSTANCE in vec4 aClipRadii_BR;
+
+struct ClipMaskInstanceRect {
+    ClipMaskInstanceCommon base;
+    vec2 local_pos;
+};
+
+ClipMaskInstanceRect fetch_clip_item() {
+    ClipMaskInstanceRect cmi;
+
+    cmi.base = fetch_clip_item_common();
+    cmi.local_pos = aClipLocalPos;
+
+    return cmi;
+}
+
+struct ClipRect {
+    RectWithEndpoint rect;
+    float mode;
+};
+
+struct ClipCorner {
+    RectWithEndpoint rect;
+    vec4 outer_inner_radius;
+};
+
+struct ClipData {
+    ClipRect rect;
+    ClipCorner top_left;
+    ClipCorner top_right;
+    ClipCorner bottom_left;
+    ClipCorner bottom_right;
+};
+
+ClipData fetch_clip() {
+    ClipData clip;
+
+    clip.rect = ClipRect(RectWithEndpoint(aClipLocalRect.xy, aClipLocalRect.zw), aClipMode);
+    clip.top_left = ClipCorner(RectWithEndpoint(aClipRect_TL.xy, aClipRect_TL.zw), aClipRadii_TL);
+    clip.top_right = ClipCorner(RectWithEndpoint(aClipRect_TR.xy, aClipRect_TR.zw), aClipRadii_TR);
+    clip.bottom_left = ClipCorner(RectWithEndpoint(aClipRect_BL.xy, aClipRect_BL.zw), aClipRadii_BL);
+    clip.bottom_right = ClipCorner(RectWithEndpoint(aClipRect_BR.xy, aClipRect_BR.zw), aClipRadii_BR);
+
+    return clip;
+}
+
+void main(void) {
+    ClipMaskInstanceRect cmi = fetch_clip_item();
+    Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
+    Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
+    ClipData clip = fetch_clip();
+
+    RectWithEndpoint local_rect = clip.rect.rect;
+    vec2 diff = cmi.local_pos - local_rect.p0;
+    local_rect.p0 = cmi.local_pos;
+    local_rect.p1 += diff;
+
+    ClipVertexInfo vi = write_clip_tile_vertex(
+        local_rect,
+        prim_transform,
+        clip_transform,
+        cmi.base.sub_rect,
+        cmi.base.task_origin,
+        cmi.base.screen_origin,
+        cmi.base.device_pixel_scale
+    );
+
+    vClipMode.x = clip.rect.mode;
+    vLocalPos = vi.local_pos;
+
+#ifdef WR_FEATURE_FAST_PATH
+    // If the radii are all uniform, we can use a much simpler 2d
+    // signed distance function to get a rounded rect clip.
+    vec2 half_size = 0.5 * rect_size(local_rect);
+    float radius = clip.top_left.outer_inner_radius.x;
+    vLocalPos.xy -= (half_size + cmi.local_pos) * vi.local_pos.w;
+    vClipParams = vec3(half_size - vec2(radius), radius);
+#else
+    RectWithEndpoint clip_rect = local_rect;
+
+    vec2 r_tl = clip.top_left.outer_inner_radius.xy;
+    vec2 r_tr = clip.top_right.outer_inner_radius.xy;
+    vec2 r_br = clip.bottom_right.outer_inner_radius.xy;
+    vec2 r_bl = clip.bottom_left.outer_inner_radius.xy;
+
+    vClipCenter_Radius_TL = vec4(clip_rect.p0 + r_tl,
+                                 inverse_radii_squared(r_tl));
+
+    vClipCenter_Radius_TR = vec4(clip_rect.p1.x - r_tr.x,
+                                 clip_rect.p0.y + r_tr.y,
+                                 inverse_radii_squared(r_tr));
+
+    vClipCenter_Radius_BR = vec4(clip_rect.p1 - r_br,
+                                 inverse_radii_squared(r_br));
+
+    vClipCenter_Radius_BL = vec4(clip_rect.p0.x + r_bl.x,
+                                 clip_rect.p1.y - r_bl.y,
+                                 inverse_radii_squared(r_bl));
+
+    // We need to know the half-spaces of the corners separate from the center
+    // and radius. We compute a point that falls on the diagonal (which is just
+    // an inner vertex pushed out along one axis, but not on both) to get the
+    // plane offset of the half-space. We also compute the direction vector of
+    // the half-space, which is a perpendicular vertex (-y,x) of the vector of
+    // the diagonal. We leave the scales of the vectors unchanged.
+    vec2 n_tl = -r_tl.yx;
+    vec2 n_tr = vec2(r_tr.y, -r_tr.x);
+    vec2 n_br = r_br.yx;
+    vec2 n_bl = vec2(-r_bl.y, r_bl.x);
+    vClipPlane_TL = vec3(n_tl,
+                         dot(n_tl, vec2(clip_rect.p0.x, clip_rect.p0.y + r_tl.y)));
+    vClipPlane_TR = vec3(n_tr,
+                         dot(n_tr, vec2(clip_rect.p1.x - r_tr.x, clip_rect.p0.y)));
+    vClipPlane_BR = vec3(n_br,
+                         dot(n_br, vec2(clip_rect.p1.x, clip_rect.p1.y - r_br.y)));
+    vClipPlane_BL = vec3(n_bl,
+                         dot(n_bl, vec2(clip_rect.p0.x + r_bl.x, clip_rect.p1.y)));
+#endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#ifdef WR_FEATURE_FAST_PATH
+// See http://www.iquilezles.org/www/articles/distfunctions2d/distfunctions2d.htm
+float sd_box(in vec2 pos, in vec2 box_size) {
+    vec2 d = abs(pos) - box_size;
+    return length(max(d, vec2(0.0))) + min(max(d.x,d.y), 0.0);
+}
+
+float sd_rounded_box(in vec2 pos, in vec2 box_size, in float radius) {
+    return sd_box(pos, box_size) - radius;
+}
+#endif
+
+void main(void) {
+    vec2 local_pos = vLocalPos.xy / vLocalPos.w;
+    float aa_range = compute_aa_range(local_pos);
+
+#ifdef WR_FEATURE_FAST_PATH
+    float dist = sd_rounded_box(local_pos, vClipParams.xy, vClipParams.z);
+#else
+    float dist = distance_to_rounded_rect(
+        local_pos,
+        vClipPlane_TL,
+        vClipCenter_Radius_TL,
+        vClipPlane_TR,
+        vClipCenter_Radius_TR,
+        vClipPlane_BR,
+        vClipCenter_Radius_BR,
+        vClipPlane_BL,
+        vClipCenter_Radius_BL,
+        vTransformBounds
+    );
+#endif
+
+    // Compute AA for the given dist and range.
+    float alpha = distance_aa(aa_range, dist);
+
+    // Select alpha or inverse alpha depending on clip in/out.
+    float final_alpha = mix(alpha, 1.0 - alpha, vClipMode.x);
+
+    float final_final_alpha = vLocalPos.w > 0.0 ? final_alpha : 0.0;
+    oFragColor = vec4(final_final_alpha, 0.0, 0.0, 1.0);
+}
+
+#ifdef SWGL_DRAW_SPAN
+// Currently the cs_clip_rectangle shader is slow because it always evaluates
+// the corner ellipse segments and the rectangle AA for every fragment the
+// shader is run on. To alleviate this for now with SWGL, this essentially
+// implements a rounded-rectangle span rasterizer inside the span shader. The
+// motivation is that we can separate out the parts of the span which are fully
+// opaque and fully transparent, outputting runs of fixed color in those areas,
+// while only evaluating the ellipse segments and AA in the smaller outlying
+// parts of the span that actually need it.
+// The shader conceptually represents a rounded rectangle as an inner octagon
+// (8 half-spaces) bounding the opaque region and an outer octagon bounding the
+// curve and AA parts. Everything outside is transparent. The line of the span
+// is intersected with half-spaces, looking for interior spans that minimally
+// intersect the half-spaces (start max, end min). In the ideal case we hit a
+// start corner ellipse segment and an end corner ellipse segment, rendering
+// the two curves on the ends with an opaque run in between, outputting clear
+// for any transparent runs before and after the start and end curves.
+// This is slightly complicated by the fact that the results here must agree
+// with the main results of the fragment shader, in case SWGL has to fall back
+// to the main fragment shader for any reason. So, we make an effort to handle
+// both ways of operating - the uniform radius fast-path and the varying radius
+// slow-path.
+void swgl_drawSpanR8() {
+    // Perspective is not supported.
+    if (swgl_interpStep(vLocalPos).w != 0.0) {
+        return;
+    }
+
+    // If the span is completely outside the Z-range and clipped out, just
+    // output clear so we don't need to consider invalid W in the rest of the
+    // shader.
+    float w = swgl_forceScalar(vLocalPos.w);
+    if (w <= 0.0) {
+        swgl_commitSolidR8(0.0);
+        return;
+    }
+
+    // To start, we evaluate the rounded-rectangle in local space relative to
+    // the local-space position. This will be interpolated across the span to
+    // track whether we intersect any half-spaces.
+    w = 1.0 / w;
+    vec2 local_pos = vLocalPos.xy * w;
+    vec2 local_pos0 = swgl_forceScalar(local_pos);
+    vec2 local_step = swgl_interpStep(vLocalPos).xy * w;
+    float step_scale = max(dot(local_step, local_step), 1.0e-6);
+
+    // Get the local-space AA range. This range represents 1/fwidth(local_pos),
+    // essentially the scale of how much local-space maps to an AA pixel. We
+    // need to know the inverse, how much local-space we traverse per AA pixel
+    // pixel step. We then scale this to represent the amount of span steps
+    // traversed per AA pixel step.
+    float aa_range = compute_aa_range(local_pos);
+    float aa_margin = inversesqrt(aa_range * aa_range * step_scale);
+
+    // We need to know the bounds of the aligned rectangle portion of the rrect
+    // in local-space. If we're using the fast-path, this is specified as the
+    // inner bounding-box half-width of the rrect and the uniform outer radius
+    // of the corners in vClipParams, which we map to the outer bounding-box.
+    // For the general case, we have already stored the outer bounding box in
+    // vTransformBounds.
+    #ifdef WR_FEATURE_FAST_PATH
+        vec4 clip_rect = vec4(-vClipParams.xy - vClipParams.z, vClipParams.xy + vClipParams.z);
+    #else
+        vec4 clip_rect = vTransformBounds;
+    #endif
+
+    // We need to compute the local-space distance to the bounding box and then
+    // figure out how many processing steps that maps to. If we are stepping in
+    // a negative direction on an axis, we need to swap the sides of the box
+    // which we consider as the start or end. If there is no local-space step
+    // on an axis (i.e. constant Y), we need to take care to force the steps to
+    // either the start or end of the span depending on if we are inside or
+    // outside of the bounding box.
+    vec4 clip_dist =
+        mix(clip_rect, clip_rect.zwxy, lessThan(local_step, vec2(0.0)).xyxy)
+            - local_pos0.xyxy;
+    clip_dist =
+        mix(1.0e6 * step(0.0, clip_dist),
+            clip_dist * recip(local_step).xyxy,
+            notEqual(local_step, vec2(0.0)).xyxy);
+
+    // Initially, the opaque region is bounded by the further start intersect
+    // with the bounding box and the nearest end intersect with the bounding
+    // box.
+    float opaque_start = max(clip_dist.x, clip_dist.y);
+    float opaque_end = min(clip_dist.z, clip_dist.w);
+    float aa_start = opaque_start;
+    float aa_end = opaque_end;
+
+    // Here we actually intersect with the half-space of the corner. We get the
+    // plane distance of the local-space position from the diagonal bounding
+    // ellipse segment from the opaque region. The half-space is defined by the
+    // direction vector of the plane and an offset point that falls on the
+    // dividing line (which is a vertex on the corner box, which is actually on
+    // the outer radius of the bounding box, but not a corner vertex). This
+    // distance is positive if on the curve side and negative if on the inner
+    // opaque region. If we are on the curve side, we need to verify we are
+    // traveling in direction towards the opaque region so that we will
+    // eventually intersect the diagonal so we can calculate when the start
+    // corner segment will end, otherwise we are going away from the rrect.
+    // If we are inside the opaque interior, we need to verify we are traveling
+    // in direction towards the curve, so that we can calculate when the end
+    // corner segment will start. Further, if we intersect, we calculate the
+    // offset of the outer octagon where AA starts from the inner octagon of
+    // where the opaque region starts using the apex vector (which is transpose
+    // of the half-space's direction).
+    //
+    // We need to intersect the corner ellipse segments. Significantly, we need
+    // to know where the apex of the ellipse segment is and how far to push the
+    // outer diagonal of the octagon from the inner diagonal. The position of
+    // the inner diagonal simply runs diagonal across the corner box and has a
+    // constant offset from vertex on the inner bounding box. The apex also has
+    // a constant offset along the opposite diagonal relative to the diagonal
+    // intersect which is 1/sqrt(2) - 0.5 assuming unit length for the diagonal.
+    // We then need to project the vector to the apex onto the local-space step
+    // scale, but we do this with reference to the normal vector of the diagonal
+    // using dot(normal, apex) / dot(normal, local_step), where the apex vector
+    // is (0.7071 - 0.5) * abs(normal).yx * sign(normal).
+    vec3 start_plane = vec3(1.0e6);
+    vec3 end_plane = vec3(1.0e6);
+
+    // plane is assumed to be a vec3 with normal in (X, Y) and offset in Z.
+    #define CLIP_CORNER(plane, info) do {                                     \
+        float dist = dot(local_pos0, plane.xy) - plane.z;                     \
+        float scale = -dot(local_step, plane.xy);                             \
+        if (scale >= 0.0) {                                                   \
+            if (dist > opaque_start * scale) {                                \
+                SET_CORNER(start_corner, info);                               \
+                start_plane = plane;                                          \
+                float inv_scale = recip(max(scale, 1.0e-6));                  \
+                opaque_start = dist * inv_scale;                              \
+                float apex = (0.7071 - 0.5) * 2.0 * abs(plane.x * plane.y);   \
+                aa_start = opaque_start - apex * inv_scale;                   \
+            }                                                                 \
+        } else if (dist > opaque_end * scale) {                               \
+            SET_CORNER(end_corner, info);                                     \
+            end_plane = plane;                                                \
+            float inv_scale = recip(min(scale, -1.0e-6));                     \
+            opaque_end = dist * inv_scale;                                    \
+            float apex = (0.7071 - 0.5) * 2.0 * abs(plane.x * plane.y);       \
+            aa_end = opaque_end - apex * inv_scale;                           \
+        }                                                                     \
+    } while (false)
+
+    #ifdef WR_FEATURE_FAST_PATH
+        // For the fast-path, we only have the half-width of the inner bounding
+        // box. We need to map this to points that fall on the diagonal of the
+        // half-space for each corner. To do this we just need to push out the
+        // vertex in the right direction on a single axis, leaving the other
+        // unchanged.
+        // However, since the corner radii are all the same, and since the local
+        // origin of each ellipse is assumed to be at (0, 0), the plane offset
+        // of the half-space is the same for each case. So given a corner offset
+        // of (x+z, y) and a vector of (z, z), the dot product becomes:
+        //   (x+z)*z + y*z == x*z + y*z + z*z 
+        // The direction vector of the corner half-space has constant length,
+        // but just needs an appropriate direction set.
+        float offset = (vClipParams.x + vClipParams.y + vClipParams.z) * vClipParams.z;
+        vec3 plane_tl = vec3(-vClipParams.zz, offset);
+        vec3 plane_tr = vec3(vClipParams.z, -vClipParams.z, offset);
+        vec3 plane_br = vec3(vClipParams.zz, offset);
+        vec3 plane_bl = vec3(-vClipParams.z, vClipParams.z, offset);
+
+        #define SET_CORNER(corner, info)
+
+        // Clip against the corner half-spaces.
+        CLIP_CORNER(plane_tl, );
+        CLIP_CORNER(plane_tr, );
+        CLIP_CORNER(plane_br, );
+        CLIP_CORNER(plane_bl, );
+
+        // Later we need to calculate distance AA for both corners and the
+        // outer bounding rect. For the fast-path, this is all done inside
+        // sd_rounded_box.
+        #define AA_RECT(local_pos) \
+            sd_rounded_box(local_pos, vClipParams.xy, vClipParams.z)
+    #else
+        // For the general case, we need to remember which of the actual start
+        // and end corners we intersect, so that we can evaluate the curve AA
+        // against only those corners rather than having to try against all 4
+        // corners for both sides of the span. Initialize these values so that
+        // if no corner is intersected, they will just zero the AA.
+        vec4 start_corner = vec4(vec2(1.0e6), vec2(1.0));
+        vec4 end_corner = vec4(vec2(1.0e6), vec2(1.0));
+
+        #define SET_CORNER(corner, info) corner = info
+
+        // Clip against the corner half-spaces. We have already computed the
+        // corner half-spaces in the vertex shader.
+        CLIP_CORNER(vClipPlane_TL, vClipCenter_Radius_TL);
+        CLIP_CORNER(vClipPlane_TR, vClipCenter_Radius_TR);
+        CLIP_CORNER(vClipPlane_BR, vClipCenter_Radius_BR);
+        CLIP_CORNER(vClipPlane_BL, vClipCenter_Radius_BL);
+
+        // Later we need to calculate distance AA for both corners and the
+        // outer bounding rect. For the general case, we need to explicitly
+        // evaluate either the ellipse segment distance or the rect distance.
+        #define AA_RECT(local_pos) \
+            signed_distance_rect(local_pos, vTransformBounds.xy, vTransformBounds.zw)
+        #define AA_CORNER(local_pos, corner) \
+            distance_to_ellipse_approx(local_pos - corner.xy, corner.zw, 1.0)
+    #endif
+
+    // Pad the AA region by a margin, as the intersections take place assuming
+    // pixel centers, but AA actually starts half a pixel away from the center.
+    // If the AA region narrows to nothing, be careful not to inflate so much
+    // that we start processing AA for fragments that don't need it.
+    aa_margin = max(aa_margin - max(aa_start - aa_end, 0.0), 0.0);
+    aa_start -= aa_margin;
+    aa_end += aa_margin;
+
+    // Compute the thresholds at which we need to transition between various
+    // segments of the span, from fully transparent outside to the start of
+    // the outer octagon where AA starts, from there to where the inner opaque
+    // octagon starts, from there to where the opaque inner octagon ends and
+    // AA starts again, to finally where the outer octagon/AA ends and we're
+    // back to fully transparent. These thresholds are just flipped offsets
+    // from the start of the span so we can compare against the remaining
+    // span length which automatically deducts as we commit fragments.
+    ivec4 steps = ivec4(clamp(
+        swgl_SpanLength -
+            swgl_StepSize *
+                vec4(floor(aa_start), ceil(opaque_start), floor(opaque_end), ceil(aa_end)),
+        0.0, swgl_SpanLength));
+    int aa_start_len = steps.x;
+    int opaque_start_len = steps.y;
+    int opaque_end_len = steps.z;
+    int aa_end_len = steps.w;
+
+    // Output fully clear while we're outside the AA region.
+    if (swgl_SpanLength > aa_start_len) {
+        int num_aa = swgl_SpanLength - aa_start_len;
+        swgl_commitPartialSolidR8(num_aa, vClipMode.x);
+        local_pos += float(num_aa / swgl_StepSize) * local_step;
+    }
+    #ifdef AA_CORNER
+    if (start_plane.x < 1.0e5) {
+        // We're now in the outer octagon which requires AA. Evaluate the corner
+        // distance of the start corner here and output AA for it. Before we hit
+        // the actual opaque inner octagon, we have a transitional step where the
+        // diagonal might intersect mid-way through the step. We have consider
+        // either the corner or rect distance depending on which side we're on.
+        while (swgl_SpanLength > opaque_start_len) {
+            float alpha = distance_aa(aa_range,
+                dot(local_pos, start_plane.xy) > start_plane.z
+                    ? AA_CORNER(local_pos, start_corner)
+                    : AA_RECT(local_pos));
+            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+            local_pos += local_step;
+        }
+    }
+    #endif
+    // If there's no start corner, just do rect AA until opaque.
+    while (swgl_SpanLength > opaque_start_len) {
+        float alpha = distance_aa(aa_range, AA_RECT(local_pos));
+        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+        local_pos += local_step;
+    }
+    // Now we're finally in the opaque inner octagon part of the span. Just
+    // output a solid run.
+    if (swgl_SpanLength > opaque_end_len) {
+        int num_opaque = swgl_SpanLength - opaque_end_len;
+        swgl_commitPartialSolidR8(num_opaque, 1.0 - vClipMode.x);
+        local_pos += float(num_opaque / swgl_StepSize) * local_step;
+    }
+    #ifdef AA_CORNER
+    if (end_plane.x < 1.0e5) {
+        // Finally we're in the AA region on the other side, inside the outer
+        // octagon again. Just evaluate the distance to the end corner and
+        // compute AA for it. We're leaving the opaque inner octagon, but like
+        // before, we have to be careful we're not dealing with a step partially
+        // intersected by the end corner's diagonal. Check which side we are on
+        // and use either the corner or rect distance as appropriate.
+        while (swgl_SpanLength > aa_end_len) {
+            float alpha = distance_aa(aa_range,
+                dot(local_pos, end_plane.xy) > end_plane.z
+                    ? AA_CORNER(local_pos, end_corner)
+                    : AA_RECT(local_pos));
+            swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+            local_pos += local_step;
+        }
+    }
+    #endif
+    // If there's no end corner, just do rect AA until clear.
+    while (swgl_SpanLength > aa_end_len) {
+        float alpha = distance_aa(aa_range, AA_RECT(local_pos));
+        swgl_commitColorR8(mix(alpha, 1.0 - alpha, vClipMode.x));
+        local_pos += local_step;
+    }
+    // We're now outside the outer AA octagon on the other side. Just output
+    // fully clear.
+    if (swgl_SpanLength > 0) {
+        swgl_commitPartialSolidR8(swgl_SpanLength, vClipMode.x);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_conic_gradient.glsl b/gfx/wr/webrender/res/cs_conic_gradient.glsl
new file mode 100644
index 0000000000..551fc8a90b
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_conic_gradient.glsl
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+#define PI                  3.141592653589793
+
+varying highp vec2 v_pos;
+
+flat varying highp vec2 v_center;
+
+// x: start offset, y: offset scale, z: angle
+// Packed in to a vector to work around bug 1630356.
+flat varying highp vec3 v_start_offset_offset_scale_angle_vec;
+#define v_start_offset v_start_offset_offset_scale_angle_vec.x
+#define v_offset_scale v_start_offset_offset_scale_angle_vec.y
+#define v_angle v_start_offset_offset_scale_angle_vec.z
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aCenter;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in float aStartOffset;
+PER_INSTANCE in float aEndOffset;
+PER_INSTANCE in float aAngle;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    // Store 1/d where d = end_offset - start_offset
+    // If d = 0, we can't get its reciprocal. Instead, just use a zero scale.
+    float d = aEndOffset - aStartOffset;
+    v_offset_scale = d != 0.0 ? 1.0 / d : 0.0;
+
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_angle = PI / 2.0 - aAngle;
+    v_start_offset = aStartOffset * v_offset_scale;
+
+    // v_pos and v_center are in a coordinate space relative to the task rect
+    // (so they are independent of the task origin).
+    v_center = aCenter * v_offset_scale;
+    v_pos = (aTaskRect.zw - aTaskRect.xy) * aPosition.xy * v_offset_scale * aScale;
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Use inverse trig to find the angle offset from the relative position.
+    vec2 current_dir = v_pos - v_center;
+    float current_angle = atan(current_dir.y, current_dir.x) + v_angle;
+    float offset = fract(current_angle / (2.0 * PI)) * v_offset_scale - v_start_offset;
+
+    oFragColor = sample_gradient(offset);
+}
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl b/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl
new file mode 100644
index 0000000000..36b3368dfe
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_fast_linear_gradient.glsl
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+varying highp float vPos;
+flat varying mediump vec4 vColor0;
+flat varying mediump vec4 vColor1;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec4 aColor0;
+PER_INSTANCE in vec4 aColor1;
+PER_INSTANCE in float aAxisSelect;
+
+void main(void) {
+    vPos = mix(0.0, 1.0, mix(aPosition.x, aPosition.y, aAxisSelect));
+
+    vColor0 = aColor0;
+    vColor1 = aColor1;
+
+    gl_Position = uTransform * vec4(mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = mix(vColor0, vColor1, vPos);
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_line_decoration.glsl b/gfx/wr/webrender/res/cs_line_decoration.glsl
new file mode 100644
index 0000000000..00ed2e249a
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_line_decoration.glsl
@@ -0,0 +1,165 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+#define LINE_STYLE_SOLID        0
+#define LINE_STYLE_DOTTED       1
+#define LINE_STYLE_DASHED       2
+#define LINE_STYLE_WAVY         3
+
+// Fragment position in the coordinate system used for positioning decorations.
+// To keep the code independent of whether the line is horizontal or vertical,
+// vLocalPos.x is always parallel, and .y always perpendicular, to the line
+// being decorated.
+varying highp vec2 vLocalPos;
+
+// Line style. Packed in to a vector to work around bug 1630356.
+flat varying mediump ivec2 vStyle;
+
+flat varying mediump vec4 vParams;
+
+#ifdef WR_VERTEX_SHADER
+
+// The size of the mask tile we're rendering, in pixels.
+PER_INSTANCE in vec4 aTaskRect;
+
+// The size of the mask tile. aLocalSize.x is always horizontal and .y vertical,
+// regardless of the line's orientation. The size is chosen by
+// prim_store::line_dec::get_line_decoration_sizes.
+PER_INSTANCE in vec2 aLocalSize;
+
+// A LINE_STYLE_* value, indicating what sort of line to draw.
+PER_INSTANCE in int aStyle;
+
+// 0.0 for a horizontal line, 1.0 for a vertical line.
+PER_INSTANCE in float aAxisSelect;
+
+// The thickness of the wavy line itself, not the amplitude of the waves (i.e.,
+// the thickness of the final decorated line).
+PER_INSTANCE in float aWavyLineThickness;
+
+void main(void) {
+    vec2 size = mix(aLocalSize, aLocalSize.yx, aAxisSelect);
+    vStyle.x = aStyle;
+
+    switch (vStyle.x) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            vParams = vec4(size.x,          // period
+                           0.5 * size.x,    // dash length
+                           0.0,
+                           0.0);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            float diameter = size.y;
+            float period = diameter * 2.0;
+            float center_line = 0.5 * size.y;
+            vParams = vec4(period,
+                           diameter / 2.0, // radius
+                           center_line,
+                           0.0);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            // This logic copied from gecko to get the same results
+            float line_thickness = max(aWavyLineThickness, 1.0);
+            // Difference in height between peaks and troughs
+            // (and since slopes are 45 degrees, the length of each slope)
+            float slope_length = size.y - line_thickness;
+            // Length of flat runs
+            float flat_length = max((line_thickness - 1.0) * 2.0, 1.0);
+
+            vParams = vec4(line_thickness / 2.0,
+                           slope_length,
+                           flat_length,
+                           size.y);
+            break;
+        }
+        default:
+            vParams = vec4(0.0);
+    }
+
+    vLocalPos = mix(aPosition.xy, aPosition.yx, aAxisSelect) * size;
+
+    gl_Position = uTransform * vec4(mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#define MAGIC_WAVY_LINE_AA_SNAP         0.5
+
+void main(void) {
+    // Find the appropriate distance to apply the step over.
+    vec2 pos = vLocalPos;
+    float aa_range = compute_aa_range(pos);
+    float alpha = 1.0;
+
+    switch (vStyle.x) {
+        case LINE_STYLE_SOLID: {
+            break;
+        }
+        case LINE_STYLE_DASHED: {
+            // Calculate dash alpha (on/off) based on dash length
+            alpha = step(floor(pos.x + 0.5), vParams.y);
+            break;
+        }
+        case LINE_STYLE_DOTTED: {
+            // Get the dot alpha
+            vec2 dot_relative_pos = pos - vParams.yz;
+            float dot_distance = length(dot_relative_pos) - vParams.y;
+            alpha = distance_aa(aa_range, dot_distance);
+            break;
+        }
+        case LINE_STYLE_WAVY: {
+            float half_line_thickness = vParams.x;
+            float slope_length = vParams.y;
+            float flat_length = vParams.z;
+            float vertical_bounds = vParams.w;
+            // Our pattern is just two slopes and two flats
+            float half_period = slope_length + flat_length;
+
+            float mid_height = vertical_bounds / 2.0;
+            float peak_offset = mid_height - half_line_thickness;
+            // Flip the wave every half period
+            float flip = -2.0 * (step(mod(pos.x, 2.0 * half_period), half_period) - 0.5);
+            // float flip = -1.0;
+            peak_offset *= flip;
+            float peak_height = mid_height + peak_offset;
+
+            // Convert pos to a local position within one half period
+            pos.x = mod(pos.x, half_period);
+
+            // Compute signed distance to the 3 lines that make up an arc
+            float dist1 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(1.0, -flip),
+                                           pos);
+            float dist2 = distance_to_line(vec2(0.0, peak_height),
+                                           vec2(0, -flip),
+                                           pos);
+            float dist3 = distance_to_line(vec2(flat_length, peak_height),
+                                           vec2(-1.0, -flip),
+                                           pos);
+            float dist = abs(max(max(dist1, dist2), dist3));
+
+            // Apply AA based on the thickness of the wave
+            alpha = distance_aa(aa_range, dist - half_line_thickness);
+
+            // Disable AA for thin lines
+            if (half_line_thickness <= 1.0) {
+                alpha = 1.0 - step(alpha, MAGIC_WAVY_LINE_AA_SNAP);
+            }
+
+            break;
+        }
+        default: break;
+    }
+
+    oFragColor = vec4(alpha);
+}
+#endif
diff --git a/gfx/wr/webrender/res/cs_linear_gradient.glsl b/gfx/wr/webrender/res/cs_linear_gradient.glsl
new file mode 100644
index 0000000000..b1aff899a6
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_linear_gradient.glsl
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+varying highp vec2 v_pos;
+
+flat varying mediump vec2 v_scale_dir;
+
+// Start offset. Packed in to a vector to work around bug 1630356.
+flat varying highp vec2 v_start_offset;
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aStartPoint;
+PER_INSTANCE in vec2 aEndPoint;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_pos = aPosition.xy * aScale;
+
+    vec2 dir = aEndPoint - aStartPoint;
+
+    // Normalize UV and offsets to 0..1 scale.
+    v_scale_dir = dir / dot(dir, dir);
+    v_start_offset.x = dot(aStartPoint, v_scale_dir);
+
+    v_scale_dir *= (aTaskRect.zw - aTaskRect.xy);
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Project position onto a direction vector to compute offset.
+    float offset = dot(v_pos, v_scale_dir) - v_start_offset.x;
+
+    oFragColor = sample_gradient(offset);
+}
+
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x), int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+
+    swgl_commitLinearGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, false, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_scale_dir, v_start_offset.x);
+}
+#endif
+
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_radial_gradient.glsl b/gfx/wr/webrender/res/cs_radial_gradient.glsl
new file mode 100644
index 0000000000..16ffe06376
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_radial_gradient.glsl
@@ -0,0 +1,71 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,rect,render_task,gpu_cache,gpu_buffer,gradient
+
+varying highp vec2 v_pos;
+
+// Start radius. Packed in to a vector to work around bug 1630356.
+flat varying highp vec2 v_start_radius;
+
+#ifdef WR_VERTEX_SHADER
+
+#define EXTEND_MODE_REPEAT 1
+
+PER_INSTANCE in vec4 aTaskRect;
+PER_INSTANCE in vec2 aCenter;
+PER_INSTANCE in vec2 aScale;
+PER_INSTANCE in float aStartRadius;
+PER_INSTANCE in float aEndRadius;
+PER_INSTANCE in float aXYRatio;
+PER_INSTANCE in int aExtendMode;
+PER_INSTANCE in int aGradientStopsAddress;
+
+void main(void) {
+    // Store 1/rd where rd = end_radius - start_radius
+    // If rd = 0, we can't get its reciprocal. Instead, just use a zero scale.
+    float rd = aEndRadius - aStartRadius;
+    float radius_scale = rd != 0.0 ? 1.0 / rd : 0.0;
+
+    vec2 pos = mix(aTaskRect.xy, aTaskRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+
+    v_start_radius.x = aStartRadius * radius_scale;
+
+    // Transform all coordinates by the y scale so the
+    // fragment shader can work with circles
+
+    // v_pos is in a coordinate space relative to the task rect
+    // (so it is independent of the task origin).
+    v_pos = ((aTaskRect.zw - aTaskRect.xy) * aPosition.xy * aScale - aCenter) * radius_scale;
+    v_pos.y *= aXYRatio;
+
+    v_gradient_repeat.x = float(aExtendMode == EXTEND_MODE_REPEAT);
+    v_gradient_address.x = aGradientStopsAddress;
+}
+#endif
+
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    // Solve for t in length(pd) = v_start_radius + t * rd
+    float offset = length(v_pos) - v_start_radius.x;
+
+    oFragColor = sample_gradient(offset);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    int address = swgl_validateGradient(sGpuBuffer, get_gpu_buffer_uv(v_gradient_address.x),
+                                        int(GRADIENT_ENTRIES + 2.0));
+    if (address < 0) {
+        return;
+    }
+    swgl_commitRadialGradientRGBA8(sGpuBuffer, address, GRADIENT_ENTRIES, v_gradient_repeat.x != 0.0,
+                                   v_pos, v_start_radius.x);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_scale.glsl b/gfx/wr/webrender/res/cs_scale.glsl
new file mode 100644
index 0000000000..0b4f5d744b
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_scale.glsl
@@ -0,0 +1,62 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This shader must remain compatible with ESSL 1, at least for the
+// WR_FEATURE_TEXTURE_EXTERNAL_ESSL1 feature, so that it can be used to render
+// video on GLES devices without GL_OES_EGL_image_external_essl3 support.
+// This means we cannot use textureSize(), int inputs/outputs, etc.
+
+#include shared
+
+varying highp vec2 vUv;
+flat varying highp vec4 vUvRect;
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+uniform vec2 uTextureSize;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE attribute vec4 aScaleTargetRect;
+PER_INSTANCE attribute vec4 aScaleSourceRect;
+
+void main(void) {
+    vec2 src_offset = aScaleSourceRect.xy;
+    vec2 src_size = aScaleSourceRect.zw - aScaleSourceRect.xy;
+
+    // If this is in WR_FEATURE_TEXTURE_RECT mode, the rect and size use
+    // non-normalized texture coordinates.
+#ifdef WR_FEATURE_TEXTURE_RECT
+    vec2 texture_size = vec2(1, 1);
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+    vec2 texture_size = uTextureSize;
+#else
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+#endif
+
+    // The uvs may be inverted, so use the min and max for the bounds
+    vUvRect = vec4(min(aScaleSourceRect.xy, aScaleSourceRect.zw) + vec2(0.5),
+                   max(aScaleSourceRect.xy, aScaleSourceRect.zw) - vec2(0.5)) / texture_size.xyxy;
+
+    vec2 pos = mix(aScaleTargetRect.xy, aScaleTargetRect.zw, aPosition.xy);
+    vUv = (src_offset + src_size * aPosition.xy) / texture_size;
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+void main(void) {
+    vec2 st = clamp(vUv, vUvRect.xy, vUvRect.zw);
+    oFragColor = TEX_SAMPLE(sColor0, st);
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    swgl_commitTextureLinearRGBA8(sColor0, vUv, vUvRect);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/cs_svg_filter.glsl b/gfx/wr/webrender/res/cs_svg_filter.glsl
new file mode 100644
index 0000000000..6ccdda90d6
--- /dev/null
+++ b/gfx/wr/webrender/res/cs_svg_filter.glsl
@@ -0,0 +1,594 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+varying highp vec2 vInput1Uv;
+varying highp vec2 vInput2Uv;
+flat varying highp vec4 vInput1UvRect;
+flat varying highp vec4 vInput2UvRect;
+flat varying mediump ivec4 vData;
+flat varying mediump vec4 vFilterData0;
+flat varying mediump vec4 vFilterData1;
+
+// x: Filter input count, y: Filter kind.
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump ivec2 vFilterInputCountFilterKindVec;
+#define vFilterInputCount vFilterInputCountFilterKindVec.x
+#define vFilterKind vFilterInputCountFilterKindVec.y
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump vec2 vFloat0;
+
+flat varying mediump mat4 vColorMat;
+flat varying mediump ivec4 vFuncs;
+
+#define FILTER_BLEND                0
+#define FILTER_FLOOD                1
+#define FILTER_LINEAR_TO_SRGB       2
+#define FILTER_SRGB_TO_LINEAR       3
+#define FILTER_OPACITY              4
+#define FILTER_COLOR_MATRIX         5
+#define FILTER_DROP_SHADOW          6
+#define FILTER_OFFSET               7
+#define FILTER_COMPONENT_TRANSFER   8
+#define FILTER_IDENTITY             9
+#define FILTER_COMPOSITE            10
+
+#define COMPOSITE_OVER       0
+#define COMPOSITE_IN         1
+#define COMPOSITE_OUT        2
+#define COMPOSITE_ATOP       3
+#define COMPOSITE_XOR        4
+#define COMPOSITE_LIGHTER    5
+#define COMPOSITE_ARITHMETIC 6
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in int aFilterRenderTaskAddress;
+PER_INSTANCE in int aFilterInput1TaskAddress;
+PER_INSTANCE in int aFilterInput2TaskAddress;
+PER_INSTANCE in int aFilterKind;
+PER_INSTANCE in int aFilterInputCount;
+PER_INSTANCE in int aFilterGenericInt;
+PER_INSTANCE in ivec2 aFilterExtraDataAddress;
+
+struct FilterTask {
+    RectWithEndpoint task_rect;
+    vec3 user_data;
+};
+
+FilterTask fetch_filter_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    FilterTask task = FilterTask(
+        task_data.task_rect,
+        task_data.user_data.xyz
+    );
+
+    return task;
+}
+
+vec4 compute_uv_rect(RectWithEndpoint task_rect, vec2 texture_size) {
+    vec4 uvRect = vec4(task_rect.p0 + vec2(0.5),
+                       task_rect.p1 - vec2(0.5));
+    uvRect /= texture_size.xyxy;
+    return uvRect;
+}
+
+vec2 compute_uv(RectWithEndpoint task_rect, vec2 texture_size) {
+    vec2 uv0 = task_rect.p0 / texture_size;
+    vec2 uv1 = floor(task_rect.p1) / texture_size;
+    return mix(uv0, uv1, aPosition.xy);
+}
+
+void main(void) {
+    FilterTask filter_task = fetch_filter_task(aFilterRenderTaskAddress);
+    RectWithEndpoint target_rect = filter_task.task_rect;
+
+    vec2 pos = mix(target_rect.p0, target_rect.p1, aPosition.xy);
+
+    RectWithEndpoint input_1_task;
+    if (aFilterInputCount > 0) {
+        vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+        input_1_task = fetch_render_task_rect(aFilterInput1TaskAddress);
+        vInput1UvRect = compute_uv_rect(input_1_task, texture_size);
+        vInput1Uv = compute_uv(input_1_task, texture_size);
+    }
+
+    RectWithEndpoint input_2_task;
+    if (aFilterInputCount > 1) {
+        vec2 texture_size = vec2(TEX_SIZE(sColor1).xy);
+        input_2_task = fetch_render_task_rect(aFilterInput2TaskAddress);
+        vInput2UvRect = compute_uv_rect(input_2_task, texture_size);
+        vInput2Uv = compute_uv(input_2_task, texture_size);
+    }
+
+    vFilterInputCount = aFilterInputCount;
+    vFilterKind = aFilterKind;
+
+    // This assignment is only used for component transfer filters but this
+    // assignment has to be done here and not in the component transfer case
+    // below because it doesn't get executed on Windows because of a suspected
+    // miscompile of this shader on Windows. See
+    // https://github.com/servo/webrender/wiki/Driver-issues#bug-1505871---assignment-to-varying-flat-arrays-inside-switch-statement-of-vertex-shader-suspected-miscompile-on-windows
+    // default: just to satisfy angle_shader_validation.rs which needs one
+    // default: for every switch, even in comments.
+    vFuncs.r = (aFilterGenericInt >> 12) & 0xf; // R
+    vFuncs.g = (aFilterGenericInt >> 8)  & 0xf; // G
+    vFuncs.b = (aFilterGenericInt >> 4)  & 0xf; // B
+    vFuncs.a = (aFilterGenericInt)       & 0xf; // A
+
+    switch (aFilterKind) {
+        case FILTER_BLEND:
+            vData = ivec4(aFilterGenericInt, 0, 0, 0);
+            break;
+        case FILTER_FLOOD:
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            break;
+        case FILTER_OPACITY:
+            vFloat0.x = filter_task.user_data.x;
+            break;
+        case FILTER_COLOR_MATRIX:
+            vec4 mat_data[4] = fetch_from_gpu_cache_4_direct(aFilterExtraDataAddress);
+            vColorMat = mat4(mat_data[0], mat_data[1], mat_data[2], mat_data[3]);
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress + ivec2(4, 0));
+            break;
+        case FILTER_DROP_SHADOW:
+            vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            break;
+        case FILTER_OFFSET:
+            vec2 texture_size = vec2(TEX_SIZE(sColor0).xy);
+            vFilterData0 = vec4(-filter_task.user_data.xy / texture_size, vec2(0.0));
+
+            RectWithEndpoint task_rect = input_1_task;
+            vec4 clipRect = vec4(task_rect.p0, task_rect.p1);
+            clipRect /= texture_size.xyxy;
+            vFilterData1 = clipRect;
+            break;
+        case FILTER_COMPONENT_TRANSFER:
+            vData = ivec4(aFilterExtraDataAddress, 0, 0);
+            break;
+        case FILTER_COMPOSITE:
+            vData = ivec4(aFilterGenericInt, 0, 0, 0);
+            if (aFilterGenericInt == COMPOSITE_ARITHMETIC) {
+              vFilterData0 = fetch_from_gpu_cache_1_direct(aFilterExtraDataAddress);
+            }
+            break;
+        default:
+            break;
+    }
+
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#define COMPONENT_TRANSFER_IDENTITY 0
+#define COMPONENT_TRANSFER_TABLE 1
+#define COMPONENT_TRANSFER_DISCRETE 2
+#define COMPONENT_TRANSFER_LINEAR 3
+#define COMPONENT_TRANSFER_GAMMA 4
+
+vec3 Multiply(vec3 Cb, vec3 Cs) {
+    return Cb * Cs;
+}
+
+vec3 Screen(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - (Cb * Cs);
+}
+
+vec3 HardLight(vec3 Cb, vec3 Cs) {
+    vec3 m = Multiply(Cb, 2.0 * Cs);
+    vec3 s = Screen(Cb, 2.0 * Cs - 1.0);
+    vec3 edge = vec3(0.5, 0.5, 0.5);
+    return mix(m, s, step(edge, Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorDodge(float Cb, float Cs) {
+    if (Cb == 0.0)
+        return 0.0;
+    else if (Cs == 1.0)
+        return 1.0;
+    else
+        return min(1.0, Cb / (1.0 - Cs));
+}
+
+// TODO: Worth doing with mix/step? Check GLSL output.
+float ColorBurn(float Cb, float Cs) {
+    if (Cb == 1.0)
+        return 1.0;
+    else if (Cs == 0.0)
+        return 0.0;
+    else
+        return 1.0 - min(1.0, (1.0 - Cb) / Cs);
+}
+
+float SoftLight(float Cb, float Cs) {
+    if (Cs <= 0.5) {
+        return Cb - (1.0 - 2.0 * Cs) * Cb * (1.0 - Cb);
+    } else {
+        float D;
+
+        if (Cb <= 0.25)
+            D = ((16.0 * Cb - 12.0) * Cb + 4.0) * Cb;
+        else
+            D = sqrt(Cb);
+
+        return Cb + (2.0 * Cs - 1.0) * (D - Cb);
+    }
+}
+
+vec3 Difference(vec3 Cb, vec3 Cs) {
+    return abs(Cb - Cs);
+}
+
+vec3 Exclusion(vec3 Cb, vec3 Cs) {
+    return Cb + Cs - 2.0 * Cb * Cs;
+}
+
+// These functions below are taken from the spec.
+// There's probably a much quicker way to implement
+// them in GLSL...
+float Sat(vec3 c) {
+    return max(c.r, max(c.g, c.b)) - min(c.r, min(c.g, c.b));
+}
+
+float Lum(vec3 c) {
+    vec3 f = vec3(0.3, 0.59, 0.11);
+    return dot(c, f);
+}
+
+vec3 ClipColor(vec3 C) {
+    float L = Lum(C);
+    float n = min(C.r, min(C.g, C.b));
+    float x = max(C.r, max(C.g, C.b));
+
+    if (n < 0.0)
+        C = L + (((C - L) * L) / (L - n));
+
+    if (x > 1.0)
+        C = L + (((C - L) * (1.0 - L)) / (x - L));
+
+    return C;
+}
+
+vec3 SetLum(vec3 C, float l) {
+    float d = l - Lum(C);
+    return ClipColor(C + d);
+}
+
+void SetSatInner(inout float Cmin, inout float Cmid, inout float Cmax, float s) {
+    if (Cmax > Cmin) {
+        Cmid = (((Cmid - Cmin) * s) / (Cmax - Cmin));
+        Cmax = s;
+    } else {
+        Cmid = 0.0;
+        Cmax = 0.0;
+    }
+    Cmin = 0.0;
+}
+
+vec3 SetSat(vec3 C, float s) {
+    if (C.r <= C.g) {
+        if (C.g <= C.b) {
+            SetSatInner(C.r, C.g, C.b, s);
+        } else {
+            if (C.r <= C.b) {
+                SetSatInner(C.r, C.b, C.g, s);
+            } else {
+                SetSatInner(C.b, C.r, C.g, s);
+            }
+        }
+    } else {
+        if (C.r <= C.b) {
+            SetSatInner(C.g, C.r, C.b, s);
+        } else {
+            if (C.g <= C.b) {
+                SetSatInner(C.g, C.b, C.r, s);
+            } else {
+                SetSatInner(C.b, C.g, C.r, s);
+            }
+        }
+    }
+    return C;
+}
+
+vec3 Hue(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cs, Sat(Cb)), Lum(Cb));
+}
+
+vec3 Saturation(vec3 Cb, vec3 Cs) {
+    return SetLum(SetSat(Cb, Sat(Cs)), Lum(Cb));
+}
+
+vec3 Color(vec3 Cb, vec3 Cs) {
+    return SetLum(Cs, Lum(Cb));
+}
+
+vec3 Luminosity(vec3 Cb, vec3 Cs) {
+    return SetLum(Cb, Lum(Cs));
+}
+
+const int BlendMode_Normal      = 0;
+const int BlendMode_Multiply    = 1;
+const int BlendMode_Screen      = 2;
+const int BlendMode_Overlay     = 3;
+const int BlendMode_Darken      = 4;
+const int BlendMode_Lighten     = 5;
+const int BlendMode_ColorDodge  = 6;
+const int BlendMode_ColorBurn   = 7;
+const int BlendMode_HardLight   = 8;
+const int BlendMode_SoftLight   = 9;
+const int BlendMode_Difference  = 10;
+const int BlendMode_Exclusion   = 11;
+const int BlendMode_Hue         = 12;
+const int BlendMode_Saturation  = 13;
+const int BlendMode_Color       = 14;
+const int BlendMode_Luminosity  = 15;
+
+vec4 blend(vec4 Cs, vec4 Cb, int mode) {
+    vec4 result = vec4(1.0, 0.0, 0.0, 1.0);
+
+    switch (mode) {
+        case BlendMode_Normal:
+            result.rgb = Cs.rgb;
+            break;
+        case BlendMode_Multiply:
+            result.rgb = Multiply(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Screen:
+            result.rgb = Screen(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Overlay:
+            // Overlay is inverse of Hardlight
+            result.rgb = HardLight(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_Darken:
+            result.rgb = min(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_Lighten:
+            result.rgb = max(Cs.rgb, Cb.rgb);
+            break;
+        case BlendMode_ColorDodge:
+            result.r = ColorDodge(Cb.r, Cs.r);
+            result.g = ColorDodge(Cb.g, Cs.g);
+            result.b = ColorDodge(Cb.b, Cs.b);
+            break;
+        case BlendMode_ColorBurn:
+            result.r = ColorBurn(Cb.r, Cs.r);
+            result.g = ColorBurn(Cb.g, Cs.g);
+            result.b = ColorBurn(Cb.b, Cs.b);
+            break;
+        case BlendMode_HardLight:
+            result.rgb = HardLight(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_SoftLight:
+            result.r = SoftLight(Cb.r, Cs.r);
+            result.g = SoftLight(Cb.g, Cs.g);
+            result.b = SoftLight(Cb.b, Cs.b);
+            break;
+        case BlendMode_Difference:
+            result.rgb = Difference(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Exclusion:
+            result.rgb = Exclusion(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Hue:
+            result.rgb = Hue(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Saturation:
+            result.rgb = Saturation(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Color:
+            result.rgb = Color(Cb.rgb, Cs.rgb);
+            break;
+        case BlendMode_Luminosity:
+            result.rgb = Luminosity(Cb.rgb, Cs.rgb);
+            break;
+        default: break;
+    }
+    vec3 rgb = (1.0 - Cb.a) * Cs.rgb + Cb.a * result.rgb;
+    result = mix(vec4(Cb.rgb * Cb.a, Cb.a), vec4(rgb, 1.0), Cs.a);
+    return result;
+}
+
+// Based on the Gecko's implementation in
+// https://hg.mozilla.org/mozilla-central/file/91b4c3687d75/gfx/src/FilterSupport.cpp#l24
+// These could be made faster by sampling a lookup table stored in a float texture
+// with linear interpolation.
+
+vec3 SrgbToLinear(vec3 color) {
+    vec3 c1 = color / 12.92;
+    vec3 c2 = pow(color / 1.055 + vec3(0.055 / 1.055), vec3(2.4));
+    return if_then_else(lessThanEqual(color, vec3(0.04045)), c1, c2);
+}
+
+vec3 LinearToSrgb(vec3 color) {
+    vec3 c1 = color * 12.92;
+    vec3 c2 = vec3(1.055) * pow(color, vec3(1.0 / 2.4)) - vec3(0.055);
+    return if_then_else(lessThanEqual(color, vec3(0.0031308)), c1, c2);
+}
+
+// This function has to be factored out due to the following issue:
+// https://github.com/servo/webrender/wiki/Driver-issues#bug-1532245---switch-statement-inside-control-flow-inside-switch-statement-fails-to-compile-on-some-android-phones
+// (and now the words "default: default:" so angle_shader_validation.rs passes)
+vec4 ComponentTransfer(vec4 colora) {
+    // We push a different amount of data to the gpu cache depending on the
+    // function type.
+    // Identity => 0 blocks
+    // Table/Discrete => 64 blocks (256 values)
+    // Linear => 1 block (2 values)
+    // Gamma => 1 block (3 values)
+    // We loop through the color components and increment the offset (for the
+    // next color component) into the gpu cache based on how many blocks that
+    // function type put into the gpu cache.
+    // Table/Discrete use a 256 entry look up table.
+    // Linear/Gamma are a simple calculation.
+    int offset = 0;
+    vec4 texel;
+    int k;
+
+    // Dynamically indexing a vector is buggy on some devices, so use a temporary array.
+    int[4] funcs = int[4](vFuncs.r, vFuncs.g, vFuncs.b, vFuncs.a);
+    for (int i = 0; i < 4; i++) {
+        switch (funcs[i]) {
+            case COMPONENT_TRANSFER_IDENTITY:
+                break;
+            case COMPONENT_TRANSFER_TABLE:
+            case COMPONENT_TRANSFER_DISCRETE:
+                // fetch value from lookup table
+                k = int(floor(colora[i]*255.0 + 0.5));
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset + k/4, 0));
+                colora[i] = clamp(texel[k % 4], 0.0, 1.0);
+                // offset plus 256/4 blocks
+                offset = offset + 64;
+                break;
+            case COMPONENT_TRANSFER_LINEAR:
+                // fetch the two values for use in the linear equation
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset, 0));
+                colora[i] = clamp(texel[0] * colora[i] + texel[1], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            case COMPONENT_TRANSFER_GAMMA:
+                // fetch the three values for use in the gamma equation
+                texel = fetch_from_gpu_cache_1_direct(vData.xy + ivec2(offset, 0));
+                colora[i] = clamp(texel[0] * pow(colora[i], texel[1]) + texel[2], 0.0, 1.0);
+                // offset plus 1 block
+                offset = offset + 1;
+                break;
+            default:
+                // shouldn't happen
+                break;
+        }
+    }
+    return colora;
+}
+
+// Composite Filter
+
+vec4 composite(vec4 Cs, vec4 Cb, int mode) {
+    vec4 Cr = vec4(0.0, 1.0, 0.0, 1.0);
+    switch (mode) {
+        case COMPOSITE_OVER:
+            Cr.rgb = Cs.a * Cs.rgb + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_IN:
+            Cr.rgb = Cs.a * Cs.rgb * Cb.a;
+            Cr.a = Cs.a * Cb.a;
+            break;
+        case COMPOSITE_OUT:
+            Cr.rgb = Cs.a * Cs.rgb * (1.0 - Cb.a);
+            Cr.a = Cs.a * (1.0 - Cb.a);
+            break;
+        case COMPOSITE_ATOP:
+            Cr.rgb = Cs.a * Cs.rgb * Cb.a + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a * Cb.a + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_XOR:
+            Cr.rgb = Cs.a * Cs.rgb * (1.0 - Cb.a) + Cb.a * Cb.rgb * (1.0 - Cs.a);
+            Cr.a = Cs.a * (1.0 - Cb.a) + Cb.a * (1.0 - Cs.a);
+            break;
+        case COMPOSITE_LIGHTER:
+            Cr.rgb = Cs.a * Cs.rgb + Cb.a * Cb.rgb;
+            Cr.a = Cs.a + Cb.a;
+            Cr = clamp(Cr, vec4(0.0), vec4(1.0));
+            break;
+        case COMPOSITE_ARITHMETIC:
+            Cr = vec4(vFilterData0.x) * Cs * Cb + vec4(vFilterData0.y) * Cs + vec4(vFilterData0.z) * Cb + vec4(vFilterData0.w);
+            Cr = clamp(Cr, vec4(0.0), vec4(1.0));
+            break;
+        default:
+            break;
+    }
+    return Cr;
+}
+
+vec4 sampleInUvRect(sampler2D sampler, vec2 uv, vec4 uvRect) {
+    vec2 clamped = clamp(uv.xy, uvRect.xy, uvRect.zw);
+    return texture(sampler, clamped);
+}
+
+void main(void) {
+    vec4 Ca = vec4(0.0, 0.0, 0.0, 0.0);
+    vec4 Cb = vec4(0.0, 0.0, 0.0, 0.0);
+    if (vFilterInputCount > 0) {
+        Ca = sampleInUvRect(sColor0, vInput1Uv, vInput1UvRect);
+        if (Ca.a != 0.0) {
+            Ca.rgb /= Ca.a;
+        }
+    }
+    if (vFilterInputCount > 1) {
+        Cb = sampleInUvRect(sColor1, vInput2Uv, vInput2UvRect);
+        if (Cb.a != 0.0) {
+            Cb.rgb /= Cb.a;
+        }
+    }
+
+    vec4 result = vec4(1.0, 0.0, 0.0, 1.0);
+
+    bool needsPremul = true;
+
+    switch (vFilterKind) {
+        case FILTER_BLEND:
+            result = blend(Ca, Cb, vData.x);
+            needsPremul = false;
+            break;
+        case FILTER_FLOOD:
+            result = vFilterData0;
+            needsPremul = false;
+            break;
+        case FILTER_LINEAR_TO_SRGB:
+            result.rgb = LinearToSrgb(Ca.rgb);
+            result.a = Ca.a;
+            break;
+        case FILTER_SRGB_TO_LINEAR:
+            result.rgb = SrgbToLinear(Ca.rgb);
+            result.a = Ca.a;
+            break;
+        case FILTER_OPACITY:
+            result.rgb = Ca.rgb;
+            result.a = Ca.a * vFloat0.x;
+            break;
+        case FILTER_COLOR_MATRIX:
+            result = vColorMat * Ca + vFilterData0;
+            result = clamp(result, vec4(0.0), vec4(1.0));
+            break;
+        case FILTER_DROP_SHADOW:
+            vec4 shadow = vec4(vFilterData0.rgb, Cb.a * vFilterData0.a);
+            // Normal blend + source-over coposite
+            result = blend(Ca, shadow, BlendMode_Normal);
+            needsPremul = false;
+            break;
+        case FILTER_OFFSET:
+            vec2 offsetUv = vInput1Uv + vFilterData0.xy;
+            result = sampleInUvRect(sColor0, offsetUv, vInput1UvRect);
+            result *= point_inside_rect(offsetUv, vFilterData1.xy, vFilterData1.zw);
+            needsPremul = false;
+            break;
+        case FILTER_COMPONENT_TRANSFER:
+            result = ComponentTransfer(Ca);
+            break;
+        case FILTER_IDENTITY:
+            result = Ca;
+            break;
+        case FILTER_COMPOSITE:
+            result = composite(Ca, Cb, vData.x);
+            needsPremul = false;
+        default:
+            break;
+    }
+
+    if (needsPremul) {
+        result.rgb *= result.a;
+    }
+
+    oFragColor = result;
+}
+#endif
diff --git a/gfx/wr/webrender/res/debug_color.glsl b/gfx/wr/webrender/res/debug_color.glsl
new file mode 100644
index 0000000000..12b530cda0
--- /dev/null
+++ b/gfx/wr/webrender/res/debug_color.glsl
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,shared_other
+
+varying mediump vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+
+void main(void) {
+    vColor = vec4(aColor.rgb * aColor.a, aColor.a);
+    vec4 pos = vec4(aPosition, 0.0, 1.0);
+    pos.xy = floor(pos.xy + 0.5);
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = vColor;
+}
+#endif
diff --git a/gfx/wr/webrender/res/debug_font.glsl b/gfx/wr/webrender/res/debug_font.glsl
new file mode 100644
index 0000000000..3b08f1b2fe
--- /dev/null
+++ b/gfx/wr/webrender/res/debug_font.glsl
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,shared_other
+
+varying highp vec2 vColorTexCoord;
+varying mediump vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aColor;
+in vec2 aColorTexCoord;
+
+void main(void) {
+    vColor = aColor;
+    vColorTexCoord = aColorTexCoord;
+    vec4 pos = vec4(aPosition, 0.0, 1.0);
+    pos.xy = floor(pos.xy + 0.5);
+    gl_Position = uTransform * pos;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = texture(sColor0, vColorTexCoord).r;
+    oFragColor = vColor * alpha;
+}
+#endif
diff --git a/gfx/wr/webrender/res/ellipse.glsl b/gfx/wr/webrender/res/ellipse.glsl
new file mode 100644
index 0000000000..a378c199ef
--- /dev/null
+++ b/gfx/wr/webrender/res/ellipse.glsl
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Preprocess the radii for computing the distance approximation. This should
+// be used in the vertex shader if possible to avoid doing expensive division
+// in the fragment shader. When dealing with a point (zero radii), approximate
+// it as an ellipse with very small radii so that we don't need to branch.
+vec2 inverse_radii_squared(vec2 radii) {
+    return 1.0 / max(radii * radii, 1.0e-6);
+}
+
+#ifdef WR_FRAGMENT_SHADER
+
+// One iteration of Newton's method on the 2D equation of an ellipse:
+//
+//     E(x, y) = x^2/a^2 + y^2/b^2 - 1
+//
+// The Jacobian of this equation is:
+//
+//     J(E(x, y)) = [ 2*x/a^2 2*y/b^2 ]
+//
+// We approximate the distance with:
+//
+//     E(x, y) / ||J(E(x, y))||
+//
+// See G. Taubin, "Distance Approximations for Rasterizing Implicit
+// Curves", section 3.
+//
+// A scale relative to the unit scale of the ellipse may be passed in to cause
+// the math to degenerate to length(p) when scale is 0, or otherwise give the
+// normal distance approximation if scale is 1.
+float distance_to_ellipse_approx(vec2 p, vec2 inv_radii_sq, float scale) {
+    vec2 p_r = p * inv_radii_sq;
+    float g = dot(p, p_r) - scale;
+    vec2 dG = (1.0 + scale) * p_r;
+    return g * inversesqrt(dot(dG, dG));
+}
+
+// Slower but more accurate version that uses the exact distance when dealing
+// with a 0-radius point distance and otherwise uses the faster approximation
+// when dealing with non-zero radii.
+float distance_to_ellipse(vec2 p, vec2 radii) {
+    return distance_to_ellipse_approx(p, inverse_radii_squared(radii),
+                                      float(all(greaterThan(radii, vec2(0.0)))));
+}
+
+float distance_to_rounded_rect(
+    vec2 pos,
+    vec3 plane_tl,
+    vec4 center_radius_tl,
+    vec3 plane_tr,
+    vec4 center_radius_tr,
+    vec3 plane_br,
+    vec4 center_radius_br,
+    vec3 plane_bl,
+    vec4 center_radius_bl,
+    vec4 rect_bounds
+) {
+    // Clip against each ellipse. If the fragment is in a corner, one of the
+    // branches below will select it as the corner to calculate the distance
+    // to. We use half-space planes to detect which corner's ellipse the
+    // fragment is inside, where the plane is defined by a normal and offset.
+    // If outside any ellipse, default to a small offset so a negative distance
+    // is returned for it.
+    vec4 corner = vec4(vec2(1.0e-6), vec2(1.0));
+
+    // Calculate the ellipse parameters for each corner.
+    center_radius_tl.xy = center_radius_tl.xy - pos;
+    center_radius_tr.xy = (center_radius_tr.xy - pos) * vec2(-1.0, 1.0);
+    center_radius_br.xy = pos - center_radius_br.xy;
+    center_radius_bl.xy = (center_radius_bl.xy - pos) * vec2(1.0, -1.0);
+
+    // Evaluate each half-space plane in turn to select a corner.
+    if (dot(pos, plane_tl.xy) > plane_tl.z) {
+      corner = center_radius_tl;
+    }
+    if (dot(pos, plane_tr.xy) > plane_tr.z) {
+      corner = center_radius_tr;
+    }
+    if (dot(pos, plane_br.xy) > plane_br.z) {
+      corner = center_radius_br;
+    }
+    if (dot(pos, plane_bl.xy) > plane_bl.z) {
+      corner = center_radius_bl;
+    }
+
+    // Calculate the distance of the selected corner and the rectangle bounds,
+    // whichever is greater.
+    return max(distance_to_ellipse_approx(corner.xy, corner.zw, 1.0),
+               signed_distance_rect(pos, rect_bounds.xy, rect_bounds.zw));
+}
+#endif
diff --git a/gfx/wr/webrender/res/gpu_buffer.glsl b/gfx/wr/webrender/res/gpu_buffer.glsl
new file mode 100644
index 0000000000..25f4622db2
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_buffer.glsl
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuBuffer;
+
+ivec2 get_gpu_buffer_uv(HIGHP_FS_ADDRESS int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4 fetch_from_gpu_buffer_1(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return texelFetch(sGpuBuffer, uv, 0);
+}
+
+vec4[2] fetch_from_gpu_buffer_2(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(1, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_buffer_3(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_buffer_4(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_buffer_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuBuffer, uv, 0, ivec2(3, 0))
+    );
+}
diff --git a/gfx/wr/webrender/res/gpu_cache.glsl b/gfx/wr/webrender/res/gpu_cache.glsl
new file mode 100644
index 0000000000..cd5e41fec4
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_cache.glsl
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;
+
+#define VECS_PER_IMAGE_RESOURCE     2
+
+// TODO(gw): This is here temporarily while we have
+//           both GPU store and cache. When the GPU
+//           store code is removed, we can change the
+//           PrimitiveInstance instance structure to
+//           use 2x unsigned shorts as vertex attributes
+//           instead of an int, and encode the UV directly
+//           in the vertices.
+ivec2 get_gpu_cache_uv(HIGHP_FS_ADDRESS int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))
+    );
+}
+
+vec4[2] fetch_from_gpu_cache_2(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))
+    );
+}
+
+vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {
+    return texelFetch(sGpuCache, address, 0);
+}
+
+vec4 fetch_from_gpu_cache_1(HIGHP_FS_ADDRESS int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return texelFetch(sGpuCache, uv, 0);
+}
+
+#ifdef WR_VERTEX_SHADER
+
+vec4[8] fetch_from_gpu_cache_8(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[8](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))
+    );
+}
+
+//TODO: image resource is too specific for this module
+
+struct ImageSource {
+    RectWithEndpoint uv_rect;
+    vec4 user_data;
+};
+
+ImageSource fetch_image_source(int address) {
+    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageSource(uv_rect, data[1]);
+}
+
+ImageSource fetch_image_source_direct(ivec2 address) {
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageSource(uv_rect, data[1]);
+}
+
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+// Note: the polygon coordinates are in homogeneous space.
+struct ImageSourceExtra {
+    vec4 st_tl;
+    vec4 st_tr;
+    vec4 st_bl;
+    vec4 st_br;
+};
+
+ImageSourceExtra fetch_image_source_extra(int address) {
+    vec4 data[4] = fetch_from_gpu_cache_4(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageSourceExtra(
+        data[0],
+        data[1],
+        data[2],
+        data[3]
+    );
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/gpu_cache_update.glsl b/gfx/wr/webrender/res/gpu_cache_update.glsl
new file mode 100644
index 0000000000..fcabfacb4f
--- /dev/null
+++ b/gfx/wr/webrender/res/gpu_cache_update.glsl
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include base
+
+varying highp vec4 vData;
+
+#ifdef WR_VERTEX_SHADER
+in vec4 aValue;
+in vec2 aPosition;
+
+void main() {
+    vData = aValue;
+    gl_Position = vec4(aPosition * 2.0 - 1.0, 0.0, 1.0);
+    gl_PointSize = 1.0;
+}
+
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+out vec4 oValue;
+
+void main() {
+    oValue = vData;
+}
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/gradient.glsl b/gfx/wr/webrender/res/gradient.glsl
new file mode 100644
index 0000000000..87c011fefc
--- /dev/null
+++ b/gfx/wr/webrender/res/gradient.glsl
@@ -0,0 +1,63 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Gradient GPU cache address.
+// Packed in to a vector to work around bug 1630356.
+flat varying highp ivec2 v_gradient_address;
+// Repetition along the gradient stops.
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump vec2 v_gradient_repeat;
+
+#ifdef WR_FRAGMENT_SHADER
+
+#ifdef WR_FEATURE_DITHERING
+vec4 dither(vec4 color) {
+    const int matrix_mask = 7;
+
+    ivec2 pos = ivec2(gl_FragCoord.xy) & ivec2(matrix_mask);
+    float noise_normalized = (texelFetch(sDither, pos, 0).r * 255.0 + 0.5) / 64.0;
+    float noise = (noise_normalized - 0.5) / 256.0; // scale down to the unit length
+
+    return color + vec4(noise, noise, noise, 0);
+}
+#else
+vec4 dither(vec4 color) {
+    return color;
+}
+#endif //WR_FEATURE_DITHERING
+
+#define GRADIENT_ENTRIES 128.0
+
+float clamp_gradient_entry(float offset) {
+    // Calculate the color entry index to use for this offset:
+    //     offsets < 0 use the first color entry, 0
+    //     offsets from [0, 1) use the color entries in the range of [1, N-1)
+    //     offsets >= 1 use the last color entry, N-1
+    //     so transform the range [0, 1) -> [1, N-1)
+
+    // TODO(gw): In the future we might consider making the size of the
+    // LUT vary based on number / distribution of stops in the gradient.
+    // Ensure we don't fetch outside the valid range of the LUT.
+    return clamp(1.0 + offset * GRADIENT_ENTRIES, 0.0, 1.0 + GRADIENT_ENTRIES);
+}
+
+vec4 sample_gradient(float offset) {
+    // Modulo the offset if the gradient repeats.
+    offset -= floor(offset) * v_gradient_repeat.x;
+
+    // Calculate the texel to index into the gradient color entries:
+    //     floor(x) is the gradient color entry index
+    //     fract(x) is the linear filtering factor between start and end
+    float x = clamp_gradient_entry(offset);
+    float entry_index = floor(x);
+    float entry_fract = x - entry_index;
+
+    // Fetch the start and end color. There is a [start, end] color per entry.
+    vec4 texels[2] = fetch_from_gpu_buffer_2(v_gradient_address.x + 2 * int(entry_index));
+
+    // Finally interpolate and apply dithering
+    return dither(texels[0] + texels[1] * entry_fract);
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/gradient_shared.glsl b/gfx/wr/webrender/res/gradient_shared.glsl
new file mode 100644
index 0000000000..a3cc042ca6
--- /dev/null
+++ b/gfx/wr/webrender/res/gradient_shared.glsl
@@ -0,0 +1,78 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include gradient
+
+// Size of the gradient pattern's rectangle, used to compute horizontal and vertical
+// repetitions. Not to be confused with another kind of repetition of the pattern
+// which happens along the gradient stops.
+flat varying highp vec2 v_repeated_size;
+
+varying highp vec2 v_pos;
+
+#ifdef WR_FEATURE_ALPHA_PASS
+flat varying highp vec2 v_tile_repeat;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+void write_gradient_vertex(
+    VertexInfo vi,
+    RectWithEndpoint local_rect,
+    RectWithEndpoint segment_rect,
+    ivec4 prim_user_data,
+    int brush_flags,
+    vec4 texel_rect,
+    int extend_mode,
+    vec2 stretch_size
+) {
+    if ((brush_flags & BRUSH_FLAG_SEGMENT_RELATIVE) != 0) {
+        v_pos = (vi.local_pos - segment_rect.p0) / rect_size(segment_rect);
+        v_pos = v_pos * (texel_rect.zw - texel_rect.xy) + texel_rect.xy;
+        v_pos = v_pos * rect_size(local_rect);
+    } else {
+        v_pos = vi.local_pos - local_rect.p0;
+    }
+
+    vec2 tile_repeat = rect_size(local_rect) / stretch_size;
+    v_repeated_size = stretch_size;
+
+    // Normalize UV to 0..1 scale.
+    v_pos /= v_repeated_size;
+
+    v_gradient_address.x = prim_user_data.x;
+
+    // Whether to repeat the gradient along the line instead of clamping.
+    v_gradient_repeat.x = float(extend_mode == EXTEND_MODE_REPEAT);
+
+#ifdef WR_FEATURE_ALPHA_PASS
+    v_tile_repeat = tile_repeat;
+#endif
+}
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+vec2 compute_repeated_pos() {
+#if defined(WR_FEATURE_ALPHA_PASS) && !defined(SWGL_ANTIALIAS)
+    // Handle top and left inflated edges (see brush_image).
+    vec2 local_pos = max(v_pos, vec2(0.0));
+
+    // Apply potential horizontal and vertical repetitions.
+    vec2 pos = fract(local_pos);
+
+    // Handle bottom and right inflated edges (see brush_image).
+    if (local_pos.x >= v_tile_repeat.x) {
+        pos.x = 1.0;
+    }
+    if (local_pos.y >= v_tile_repeat.y) {
+        pos.y = 1.0;
+    }
+    return pos;
+#else
+    // Apply potential horizontal and vertical repetitions.
+    return fract(v_pos);
+#endif
+}
+
+#endif //WR_FRAGMENT_SHADER
+
diff --git a/gfx/wr/webrender/res/prim_shared.glsl b/gfx/wr/webrender/res/prim_shared.glsl
new file mode 100644
index 0000000000..1a599bf980
--- /dev/null
+++ b/gfx/wr/webrender/res/prim_shared.glsl
@@ -0,0 +1,246 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include rect,render_task,gpu_cache,transform
+
+#define EXTEND_MODE_CLAMP  0
+#define EXTEND_MODE_REPEAT 1
+
+#define SUBPX_DIR_NONE        0
+#define SUBPX_DIR_HORIZONTAL  1
+#define SUBPX_DIR_VERTICAL    2
+#define SUBPX_DIR_MIXED       3
+
+#define RASTER_LOCAL            0
+#define RASTER_SCREEN           1
+
+uniform sampler2D sClipMask;
+
+#ifndef SWGL_CLIP_MASK
+// TODO: convert back to RectWithEndpoint if driver issues are resolved, if ever.
+flat varying mediump vec4 vClipMaskUvBounds;
+varying highp vec2 vClipMaskUv;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+#define COLOR_MODE_ALPHA                0
+#define COLOR_MODE_SUBPX_DUAL_SOURCE    1
+#define COLOR_MODE_BITMAP_SHADOW        2
+#define COLOR_MODE_COLOR_BITMAP         3
+#define COLOR_MODE_IMAGE                4
+#define COLOR_MODE_MULTIPLY_DUAL_SOURCE 5
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sPrimitiveHeadersF;
+uniform HIGHP_SAMPLER_FLOAT isampler2D sPrimitiveHeadersI;
+
+// Instanced attributes
+PER_INSTANCE in ivec4 aData;
+
+#define VECS_PER_PRIM_HEADER_F 2U
+#define VECS_PER_PRIM_HEADER_I 2U
+
+struct Instance
+{
+    int prim_header_address;
+    int picture_task_address;
+    int clip_address;
+    int segment_index;
+    int flags;
+    int resource_address;
+    int brush_kind;
+};
+
+Instance decode_instance_attributes() {
+    Instance instance;
+
+    instance.prim_header_address = aData.x;
+    instance.picture_task_address = aData.y >> 16;
+    instance.clip_address = aData.y & 0xffff;
+    instance.segment_index = aData.z & 0xffff;
+    instance.flags = aData.z >> 16;
+    instance.resource_address = aData.w & 0xffffff;
+    instance.brush_kind = aData.w >> 24;
+
+    return instance;
+}
+
+struct PrimitiveHeader {
+    RectWithEndpoint local_rect;
+    RectWithEndpoint local_clip_rect;
+    float z;
+    int specific_prim_address;
+    int transform_id;
+    ivec4 user_data;
+};
+
+PrimitiveHeader fetch_prim_header(int index) {
+    PrimitiveHeader ph;
+
+    ivec2 uv_f = get_fetch_uv(index, VECS_PER_PRIM_HEADER_F);
+    vec4 local_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(0, 0));
+    vec4 local_clip_rect = TEXEL_FETCH(sPrimitiveHeadersF, uv_f, 0, ivec2(1, 0));
+    ph.local_rect = RectWithEndpoint(local_rect.xy, local_rect.zw);
+    ph.local_clip_rect = RectWithEndpoint(local_clip_rect.xy, local_clip_rect.zw);
+
+    ivec2 uv_i = get_fetch_uv(index, VECS_PER_PRIM_HEADER_I);
+    ivec4 data0 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(0, 0));
+    ivec4 data1 = TEXEL_FETCH(sPrimitiveHeadersI, uv_i, 0, ivec2(1, 0));
+    ph.z = float(data0.x);
+    ph.specific_prim_address = data0.y;
+    ph.transform_id = data0.z;
+    ph.user_data = data1;
+
+    return ph;
+}
+
+struct VertexInfo {
+    vec2 local_pos;
+    vec4 world_pos;
+};
+
+VertexInfo write_vertex(vec2 local_pos,
+                        RectWithEndpoint local_clip_rect,
+                        float z,
+                        Transform transform,
+                        PictureTask task) {
+    // Clamp to the two local clip rects.
+    vec2 clamped_local_pos = rect_clamp(local_clip_rect, local_pos);
+
+    // Transform the current vertex to world space.
+    vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
+
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy * task.device_pixel_scale;
+
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_offset = -task.content_origin + task.task_rect.p0;
+
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
+
+    VertexInfo vi = VertexInfo(
+        clamped_local_pos,
+        world_pos
+    );
+
+    return vi;
+}
+
+RectWithEndpoint clip_and_init_antialiasing(RectWithEndpoint segment_rect,
+                                            RectWithEndpoint prim_rect,
+                                            RectWithEndpoint clip_rect,
+                                            int edge_flags,
+                                            float z,
+                                            Transform transform,
+                                            PictureTask task) {
+#ifdef SWGL_ANTIALIAS
+    // Check if the bounds are smaller than the unmodified segment rect. If so,
+    // it is safe to enable AA on those edges.
+    bvec4 clipped = bvec4(greaterThan(clip_rect.p0, segment_rect.p0),
+                          lessThan(clip_rect.p1, segment_rect.p1));
+    swgl_antiAlias(edge_flags | (clipped.x ? 1 : 0) | (clipped.y ? 2 : 0) |
+                   (clipped.z ? 4 : 0) | (clipped.w ? 8 : 0));
+#endif
+
+    segment_rect.p0 = clamp(segment_rect.p0, clip_rect.p0, clip_rect.p1);
+    segment_rect.p1 = clamp(segment_rect.p1, clip_rect.p0, clip_rect.p1);
+
+#ifndef SWGL_ANTIALIAS
+    prim_rect.p0 = clamp(prim_rect.p0, clip_rect.p0, clip_rect.p1);
+    prim_rect.p1 = clamp(prim_rect.p1, clip_rect.p0, clip_rect.p1);
+
+    // Select between the segment and prim edges based on edge mask.
+    // We must perform the bitwise-and for each component individually, as a
+    // vector bitwise-and followed by conversion to bvec4 causes shader
+    // compilation crashes on some Adreno devices. See bug 1715746.
+    bvec4 clip_edge_mask = bvec4(bool(edge_flags & 1), bool(edge_flags & 2), bool(edge_flags & 4), bool(edge_flags & 8));
+    init_transform_vs(mix(
+        vec4(vec2(-1e16), vec2(1e16)),
+        vec4(segment_rect.p0, segment_rect.p1),
+        clip_edge_mask
+    ));
+
+    // As this is a transform shader, extrude by 2 (local space) pixels
+    // in each direction. This gives enough space around the edge to
+    // apply distance anti-aliasing. Technically, it:
+    // (a) slightly over-estimates the number of required pixels in the simple case.
+    // (b) might not provide enough edge in edge case perspective projections.
+    // However, it's fast and simple. If / when we ever run into issues, we
+    // can do some math on the projection matrix to work out a variable
+    // amount to extrude.
+
+    // Only extrude along edges where we are going to apply AA.
+    float extrude_amount = 2.0;
+    vec4 extrude_distance = mix(vec4(0.0), vec4(extrude_amount), clip_edge_mask);
+    segment_rect.p0 -= extrude_distance.xy;
+    segment_rect.p1 += extrude_distance.zw;
+#endif
+
+    return segment_rect;
+}
+
+void write_clip(vec4 world_pos, ClipArea area, PictureTask task) {
+#ifdef SWGL_CLIP_MASK
+    swgl_clipMask(
+        sClipMask,
+        (task.task_rect.p0 - task.content_origin) - (area.task_rect.p0 - area.screen_origin),
+        area.task_rect.p0,
+        rect_size(area.task_rect)
+    );
+#else
+    vec2 uv = world_pos.xy * area.device_pixel_scale +
+        world_pos.w * (area.task_rect.p0 - area.screen_origin);
+    vClipMaskUvBounds = vec4(
+        area.task_rect.p0,
+        area.task_rect.p1
+    );
+    vClipMaskUv = uv;
+#endif
+}
+
+// Read the exta image data containing the homogeneous screen space coordinates
+// of the corners, interpolate between them, and return real screen space UV.
+vec2 get_image_quad_uv(int address, vec2 f) {
+    ImageSourceExtra extra_data = fetch_image_source_extra(address);
+    vec4 x = mix(extra_data.st_tl, extra_data.st_tr, f.x);
+    vec4 y = mix(extra_data.st_bl, extra_data.st_br, f.x);
+    vec4 z = mix(x, y, f.y);
+    return z.xy / z.w;
+}
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+struct Fragment {
+    vec4 color;
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+    vec4 blend;
+#endif
+};
+
+float do_clip() {
+#ifdef SWGL_CLIP_MASK
+    // SWGL relies on builtin clip-mask support to do this more efficiently,
+    // so no clipping is required here.
+    return 1.0;
+#else
+    // check for the dummy bounds, which are given to the opaque objects
+    if (vClipMaskUvBounds.xy == vClipMaskUvBounds.zw) {
+        return 1.0;
+    }
+    // anything outside of the mask is considered transparent
+    //Note: we assume gl_FragCoord.w == interpolated(1 / vClipMaskUv.w)
+    vec2 mask_uv = vClipMaskUv * gl_FragCoord.w;
+    bvec2 left = lessThanEqual(vClipMaskUvBounds.xy, mask_uv); // inclusive
+    bvec2 right = greaterThan(vClipMaskUvBounds.zw, mask_uv); // non-inclusive
+    // bail out if the pixel is outside the valid bounds
+    if (!all(bvec4(left, right))) {
+        return 0.0;
+    }
+    // finally, the slow path - fetch the mask value from an image
+    return texelFetch(sClipMask, ivec2(mask_uv), 0).r;
+#endif
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/ps_clear.glsl b/gfx/wr/webrender/res/ps_clear.glsl
new file mode 100644
index 0000000000..567dea978d
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_clear.glsl
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+varying mediump vec4 vColor;
+
+#ifdef WR_VERTEX_SHADER
+PER_INSTANCE in vec4 aRect;
+PER_INSTANCE in vec4 aColor;
+
+void main(void) {
+    vec2 pos = mix(aRect.xy, aRect.zw, aPosition.xy);
+    gl_Position = uTransform * vec4(pos, 0.0, 1.0);
+    gl_Position.z = gl_Position.w; // force depth clear to 1.0
+    vColor = aColor;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    oFragColor = vColor;
+}
+#endif
diff --git a/gfx/wr/webrender/res/ps_copy.glsl b/gfx/wr/webrender/res/ps_copy.glsl
new file mode 100644
index 0000000000..b4e43f1556
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_copy.glsl
@@ -0,0 +1,41 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include base
+
+#ifdef WR_VERTEX_SHADER
+
+attribute vec2 aPosition;
+
+// See CopyInstance struct.
+attribute vec4 a_src_rect;
+attribute vec4 a_dst_rect;
+attribute vec2 a_dst_texture_size;
+
+varying highp vec2 v_uv;
+
+void main(void) {
+    // We use texel fetch so v_uv is in unnormalized device space.
+    v_uv = mix(a_src_rect.xy, a_src_rect.zw, aPosition.xy);
+
+    // Transform into framebuffer [-1, 1] space.
+    vec2 pos = mix(a_dst_rect.xy, a_dst_rect.zw, aPosition.xy);
+    gl_Position = vec4(pos / (a_dst_texture_size  * 0.5) - vec2(1.0, 1.0), 0.0, 1.0);
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+
+out vec4 oFragColor;
+
+varying highp vec2 v_uv;
+
+uniform sampler2D sColor0;
+
+void main(void) {
+    oFragColor = texelFetch(sColor0, ivec2(v_uv), 0);
+}
+
+#endif
diff --git a/gfx/wr/webrender/res/ps_quad.glsl b/gfx/wr/webrender/res/ps_quad.glsl
new file mode 100644
index 0000000000..ed6b35c3b8
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_quad.glsl
@@ -0,0 +1,302 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,rect,transform,render_task,gpu_buffer
+
+flat varying mediump vec4 v_color;
+flat varying mediump vec4 v_uv_sample_bounds;
+flat varying lowp ivec4 v_flags;
+varying highp vec2 v_uv;
+
+#ifdef WR_VERTEX_SHADER
+
+#define EDGE_AA_LEFT    1
+#define EDGE_AA_TOP     2
+#define EDGE_AA_RIGHT   4
+#define EDGE_AA_BOTTOM  8
+
+#define PART_CENTER     0
+#define PART_LEFT       1
+#define PART_TOP        2
+#define PART_RIGHT      3
+#define PART_BOTTOM     4
+#define PART_ALL        5
+
+#define QF_IS_OPAQUE            1
+#define QF_APPLY_DEVICE_CLIP    2
+#define QF_IGNORE_DEVICE_SCALE  4
+#define QF_USE_AA_SEGMENTS      8
+#define QF_SAMPLE_AS_MASK       16
+
+#define INVALID_SEGMENT_INDEX   0xff
+
+#define AA_PIXEL_RADIUS 2.0
+
+PER_INSTANCE in ivec4 aData;
+
+struct PrimitiveInfo {
+    vec2 local_pos;
+
+    RectWithEndpoint local_prim_rect;
+    RectWithEndpoint local_clip_rect;
+
+    int edge_flags;
+    int quad_flags;
+};
+
+struct QuadSegment {
+    RectWithEndpoint rect;
+    vec4 uv_rect;
+};
+
+struct QuadPrimitive {
+    RectWithEndpoint bounds;
+    RectWithEndpoint clip;
+    vec4 color;
+};
+
+QuadSegment fetch_segment(int base, int index) {
+    QuadSegment seg;
+
+    vec4 texels[2] = fetch_from_gpu_buffer_2(base + 3 + index * 2);
+
+    seg.rect = RectWithEndpoint(texels[0].xy, texels[0].zw);
+    seg.uv_rect = texels[1];
+
+    return seg;
+}
+
+QuadPrimitive fetch_primitive(int index) {
+    QuadPrimitive prim;
+
+    vec4 texels[3] = fetch_from_gpu_buffer_3(index);
+
+    prim.bounds = RectWithEndpoint(texels[0].xy, texels[0].zw);
+    prim.clip = RectWithEndpoint(texels[1].xy, texels[1].zw);
+    prim.color = texels[2];
+
+    return prim;
+}
+
+struct QuadInstance {
+    // x
+    int prim_address;
+
+    // y
+    int quad_flags;
+    int edge_flags;
+    int picture_task_address;
+
+    // z
+    int part_index;
+    int z_id;
+
+    // w
+    int segment_index;
+    int transform_id;
+};
+
+QuadInstance decode_instance() {
+    QuadInstance qi = QuadInstance(
+        aData.x,
+
+        (aData.y >> 24) & 0xff,
+        (aData.y >> 16) & 0xff,
+        aData.y & 0xffff,
+
+        (aData.z >> 24) & 0xff,
+        aData.z & 0xffffff,
+
+        (aData.w >> 24) & 0xff,
+        aData.w & 0xffffff
+    );
+
+    return qi;
+}
+
+struct VertexInfo {
+    vec2 local_pos;
+};
+
+VertexInfo write_vertex(vec2 local_pos,
+                        float z,
+                        Transform transform,
+                        vec2 content_origin,
+                        RectWithEndpoint task_rect,
+                        float device_pixel_scale,
+                        int quad_flags) {
+    VertexInfo vi;
+
+    // Transform the current vertex to world space.
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
+
+    // Convert the world positions to device pixel space.
+    vec2 device_pos = world_pos.xy * device_pixel_scale;
+
+    if ((quad_flags & QF_APPLY_DEVICE_CLIP) != 0) {
+        RectWithEndpoint device_clip_rect = RectWithEndpoint(
+            content_origin,
+            content_origin + task_rect.p1 - task_rect.p0
+        );
+
+        // Clip to task rect
+        device_pos = rect_clamp(device_clip_rect, device_pos);
+
+        vi.local_pos = (transform.inv_m * vec4(device_pos / device_pixel_scale, 0.0, 1.0)).xy;
+    } else {
+        vi.local_pos = local_pos;
+    }
+
+    // Apply offsets for the render task to get correct screen location.
+    vec2 final_offset = -content_origin + task_rect.p0;
+
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
+
+    return vi;
+}
+
+float edge_aa_offset(int edge, int flags) {
+    return ((flags & edge) != 0) ? AA_PIXEL_RADIUS : 0.0;
+}
+
+PrimitiveInfo ps_quad_main(void) {
+    QuadInstance qi = decode_instance();
+
+    Transform transform = fetch_transform(qi.transform_id);
+    PictureTask task = fetch_picture_task(qi.picture_task_address);
+    QuadPrimitive prim = fetch_primitive(qi.prim_address);
+    float z = float(qi.z_id);
+
+    QuadSegment seg;
+    if (qi.segment_index == INVALID_SEGMENT_INDEX) {
+        seg.rect = prim.bounds;
+        seg.uv_rect = vec4(0.0);
+    } else {
+        seg = fetch_segment(qi.prim_address, qi.segment_index);
+    }
+
+    // The local space rect that we will draw, which is effectively:
+    //  - The tile within the primitive we will draw
+    //  - Intersected with any local-space clip rect(s)
+    //  - Expanded for AA edges where appropriate
+    RectWithEndpoint local_coverage_rect = seg.rect;
+
+    // Apply local clip rect
+    local_coverage_rect.p0 = max(local_coverage_rect.p0, prim.clip.p0);
+    local_coverage_rect.p1 = min(local_coverage_rect.p1, prim.clip.p1);
+    local_coverage_rect.p1 = max(local_coverage_rect.p0, local_coverage_rect.p1);
+
+    switch (qi.part_index) {
+        case PART_LEFT:
+            local_coverage_rect.p1.x = local_coverage_rect.p0.x + AA_PIXEL_RADIUS;
+#ifdef SWGL_ANTIALIAS
+            swgl_antiAlias(EDGE_AA_LEFT);
+#else
+            local_coverage_rect.p0.x -= AA_PIXEL_RADIUS;
+            local_coverage_rect.p0.y -= AA_PIXEL_RADIUS;
+            local_coverage_rect.p1.y += AA_PIXEL_RADIUS;
+#endif
+            break;
+        case PART_TOP:
+            local_coverage_rect.p0.x = local_coverage_rect.p0.x + AA_PIXEL_RADIUS;
+            local_coverage_rect.p1.x = local_coverage_rect.p1.x - AA_PIXEL_RADIUS;
+            local_coverage_rect.p1.y = local_coverage_rect.p0.y + AA_PIXEL_RADIUS;
+#ifdef SWGL_ANTIALIAS
+            swgl_antiAlias(EDGE_AA_TOP);
+#else
+            local_coverage_rect.p0.y -= AA_PIXEL_RADIUS;
+#endif
+            break;
+        case PART_RIGHT:
+            local_coverage_rect.p0.x = local_coverage_rect.p1.x - AA_PIXEL_RADIUS;
+#ifdef SWGL_ANTIALIAS
+            swgl_antiAlias(EDGE_AA_RIGHT);
+#else
+            local_coverage_rect.p1.x += AA_PIXEL_RADIUS;
+            local_coverage_rect.p0.y -= AA_PIXEL_RADIUS;
+            local_coverage_rect.p1.y += AA_PIXEL_RADIUS;
+#endif
+            break;
+        case PART_BOTTOM:
+            local_coverage_rect.p0.x = local_coverage_rect.p0.x + AA_PIXEL_RADIUS;
+            local_coverage_rect.p1.x = local_coverage_rect.p1.x - AA_PIXEL_RADIUS;
+            local_coverage_rect.p0.y = local_coverage_rect.p1.y - AA_PIXEL_RADIUS;
+#ifdef SWGL_ANTIALIAS
+            swgl_antiAlias(EDGE_AA_BOTTOM);
+#else
+            local_coverage_rect.p1.y += AA_PIXEL_RADIUS;
+#endif
+            break;
+        case PART_CENTER:
+            local_coverage_rect.p0.x += edge_aa_offset(EDGE_AA_LEFT, qi.edge_flags);
+            local_coverage_rect.p1.x -= edge_aa_offset(EDGE_AA_RIGHT, qi.edge_flags);
+            local_coverage_rect.p0.y += edge_aa_offset(EDGE_AA_TOP, qi.edge_flags);
+            local_coverage_rect.p1.y -= edge_aa_offset(EDGE_AA_BOTTOM, qi.edge_flags);
+            break;
+        case PART_ALL:
+        default:
+#ifdef SWGL_ANTIALIAS
+            swgl_antiAlias(qi.edge_flags);
+#else
+            local_coverage_rect.p0.x -= edge_aa_offset(EDGE_AA_LEFT, qi.edge_flags);
+            local_coverage_rect.p1.x += edge_aa_offset(EDGE_AA_RIGHT, qi.edge_flags);
+            local_coverage_rect.p0.y -= edge_aa_offset(EDGE_AA_TOP, qi.edge_flags);
+            local_coverage_rect.p1.y += edge_aa_offset(EDGE_AA_BOTTOM, qi.edge_flags);
+#endif
+            break;
+    }
+
+    vec2 local_pos = mix(local_coverage_rect.p0, local_coverage_rect.p1, aPosition);
+
+    float device_pixel_scale = task.device_pixel_scale;
+    if ((qi.quad_flags & QF_IGNORE_DEVICE_SCALE) != 0) {
+        device_pixel_scale = 1.0f;
+    }
+
+    VertexInfo vi = write_vertex(
+        local_pos,
+        z,
+        transform,
+        task.content_origin,
+        task.task_rect,
+        device_pixel_scale,
+        qi.quad_flags
+    );
+
+    if (seg.uv_rect.xy == seg.uv_rect.zw) {
+        v_color = prim.color;
+        v_flags.y = 0;
+    } else {
+        v_color = vec4(1.0);
+        v_flags.y = 1;
+
+        vec2 f = (vi.local_pos - seg.rect.p0) / (seg.rect.p1 - seg.rect.p0);
+
+        vec2 uv = mix(
+            seg.uv_rect.xy,
+            seg.uv_rect.zw,
+            f
+        );
+
+        vec2 texture_size = vec2(TEX_SIZE(sColor0));
+
+        v_uv = uv / texture_size;
+
+        v_uv_sample_bounds = vec4(
+            seg.uv_rect.xy + vec2(0.5),
+            seg.uv_rect.zw - vec2(0.5)
+        ) / texture_size.xyxy;
+    }
+
+    return PrimitiveInfo(
+        vi.local_pos,
+        prim.bounds,
+        prim.clip,
+        qi.edge_flags,
+        qi.quad_flags
+    );
+}
+#endif
diff --git a/gfx/wr/webrender/res/ps_quad_mask.glsl b/gfx/wr/webrender/res/ps_quad_mask.glsl
new file mode 100644
index 0000000000..4b28109726
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_quad_mask.glsl
@@ -0,0 +1,178 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include ps_quad,ellipse
+
+varying highp vec4 vClipLocalPos;
+
+#ifdef WR_FEATURE_FAST_PATH
+flat varying highp vec3 v_clip_params;      // xy = box size, z = radius
+#else
+flat varying highp vec4 vClipCenter_Radius_TL;
+flat varying highp vec4 vClipCenter_Radius_TR;
+flat varying highp vec4 vClipCenter_Radius_BR;
+flat varying highp vec4 vClipCenter_Radius_BL;
+flat varying highp vec3 vClipPlane_TL;
+flat varying highp vec3 vClipPlane_TR;
+flat varying highp vec3 vClipPlane_BL;
+flat varying highp vec3 vClipPlane_BR;
+#endif
+flat varying highp vec2 vClipMode;
+
+#ifdef WR_VERTEX_SHADER
+
+PER_INSTANCE in ivec4 aClipData;
+
+#define CLIP_SPACE_RASTER       0
+#define CLIP_SPACE_PRIMITIVE    1
+
+struct Clip {
+    RectWithEndpoint rect;
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 radii;
+#else
+    vec4 radii_top;
+    vec4 radii_bottom;
+#endif
+    float mode;
+    int space;
+};
+
+Clip fetch_clip(int index) {
+    Clip clip;
+
+    clip.space = aClipData.z;
+
+#ifdef WR_FEATURE_FAST_PATH
+    vec4 texels[3] = fetch_from_gpu_buffer_3(index);
+    clip.rect = RectWithEndpoint(texels[0].xy, texels[0].zw);
+    clip.radii = texels[1];
+    clip.mode = texels[2].x;
+#else
+    vec4 texels[4] = fetch_from_gpu_buffer_4(index);
+    clip.rect = RectWithEndpoint(texels[0].xy, texels[0].zw);
+    clip.radii_top = texels[1];
+    clip.radii_bottom = texels[2];
+    clip.mode = texels[3].x;
+#endif
+
+    return clip;
+}
+
+void main(void) {
+    PrimitiveInfo prim_info = ps_quad_main();
+
+    Clip clip = fetch_clip(aClipData.y);
+    Transform clip_transform = fetch_transform(aClipData.x);
+
+    vClipLocalPos = clip_transform.m * vec4(prim_info.local_pos, 0.0, 1.0);
+
+#ifndef WR_FEATURE_FAST_PATH
+    if (clip.space == CLIP_SPACE_RASTER) {
+        vTransformBounds = vec4(clip.rect.p0, clip.rect.p1);
+    } else {
+        RectWithEndpoint xf_bounds = RectWithEndpoint(
+            max(clip.rect.p0, prim_info.local_clip_rect.p0),
+            min(clip.rect.p1, prim_info.local_clip_rect.p1)
+        );
+        vTransformBounds = vec4(xf_bounds.p0, xf_bounds.p1);
+    }
+#endif
+
+    vClipMode.x = clip.mode;
+
+#ifdef WR_FEATURE_FAST_PATH
+    // If the radii are all uniform, we can use a much simpler 2d
+    // signed distance function to get a rounded rect clip.
+    vec2 half_size = 0.5 * (clip.rect.p1 - clip.rect.p0);
+    float radius = clip.radii.x;
+    vClipLocalPos.xy -= (half_size + clip.rect.p0) * vClipLocalPos.w;
+    v_clip_params = vec3(half_size - vec2(radius), radius);
+#else
+    vec2 r_tl = clip.radii_top.xy;
+    vec2 r_tr = clip.radii_top.zw;
+    vec2 r_br = clip.radii_bottom.zw;
+    vec2 r_bl = clip.radii_bottom.xy;
+
+    vClipCenter_Radius_TL = vec4(clip.rect.p0 + r_tl,
+                                 inverse_radii_squared(r_tl));
+
+    vClipCenter_Radius_TR = vec4(clip.rect.p1.x - r_tr.x,
+                                 clip.rect.p0.y + r_tr.y,
+                                 inverse_radii_squared(r_tr));
+
+    vClipCenter_Radius_BR = vec4(clip.rect.p1 - r_br,
+                                 inverse_radii_squared(r_br));
+
+    vClipCenter_Radius_BL = vec4(clip.rect.p0.x + r_bl.x,
+                                 clip.rect.p1.y - r_bl.y,
+                                 inverse_radii_squared(r_bl));
+
+    // We need to know the half-spaces of the corners separate from the center
+    // and radius. We compute a point that falls on the diagonal (which is just
+    // an inner vertex pushed out along one axis, but not on both) to get the
+    // plane offset of the half-space. We also compute the direction vector of
+    // the half-space, which is a perpendicular vertex (-y,x) of the vector of
+    // the diagonal. We leave the scales of the vectors unchanged.
+    vec2 n_tl = -r_tl.yx;
+    vec2 n_tr = vec2(r_tr.y, -r_tr.x);
+    vec2 n_br = r_br.yx;
+    vec2 n_bl = vec2(-r_bl.y, r_bl.x);
+    vClipPlane_TL = vec3(n_tl,
+                         dot(n_tl, vec2(clip.rect.p0.x, clip.rect.p0.y + r_tl.y)));
+    vClipPlane_TR = vec3(n_tr,
+                         dot(n_tr, vec2(clip.rect.p1.x - r_tr.x, clip.rect.p0.y)));
+    vClipPlane_BR = vec3(n_br,
+                         dot(n_br, vec2(clip.rect.p1.x, clip.rect.p1.y - r_br.y)));
+    vClipPlane_BL = vec3(n_bl,
+                         dot(n_bl, vec2(clip.rect.p0.x + r_bl.x, clip.rect.p1.y)));
+#endif
+
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+#ifdef WR_FEATURE_FAST_PATH
+// See http://www.iquilezles.org/www/articles/distfunctions2d/distfunctions2d.htm
+float sd_box(in vec2 pos, in vec2 box_size) {
+    vec2 d = abs(pos) - box_size;
+    return length(max(d, vec2(0.0))) + min(max(d.x,d.y), 0.0);
+}
+
+float sd_rounded_box(in vec2 pos, in vec2 box_size, in float radius) {
+    return sd_box(pos, box_size) - radius;
+}
+#endif
+
+void main(void) {
+    vec2 clip_local_pos = vClipLocalPos.xy / vClipLocalPos.w;
+    float aa_range = compute_aa_range(clip_local_pos);
+
+#ifdef WR_FEATURE_FAST_PATH
+    float dist = sd_rounded_box(clip_local_pos, v_clip_params.xy, v_clip_params.z);
+#else
+    float dist = distance_to_rounded_rect(
+        clip_local_pos,
+        vClipPlane_TL,
+        vClipCenter_Radius_TL,
+        vClipPlane_TR,
+        vClipCenter_Radius_TR,
+        vClipPlane_BR,
+        vClipCenter_Radius_BR,
+        vClipPlane_BL,
+        vClipCenter_Radius_BL,
+        vTransformBounds
+    );
+#endif
+
+    // Compute AA for the given dist and range.
+    float alpha = distance_aa(aa_range, dist);
+
+    // Select alpha or inverse alpha depending on clip in/out.
+    float final_alpha = mix(alpha, 1.0 - alpha, vClipMode.x);
+
+    oFragColor = vec4(final_alpha);
+}
+#endif
diff --git a/gfx/wr/webrender/res/ps_quad_textured.glsl b/gfx/wr/webrender/res/ps_quad_textured.glsl
new file mode 100644
index 0000000000..48579eb4fe
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_quad_textured.glsl
@@ -0,0 +1,77 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include ps_quad
+
+#ifndef SWGL_ANTIALIAS
+varying highp vec2 vLocalPos;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+void main(void) {
+    PrimitiveInfo info = ps_quad_main();
+
+#ifndef SWGL_ANTIALIAS
+    RectWithEndpoint xf_bounds = RectWithEndpoint(
+        max(info.local_prim_rect.p0, info.local_clip_rect.p0),
+        min(info.local_prim_rect.p1, info.local_clip_rect.p1)
+    );
+    vTransformBounds = vec4(xf_bounds.p0, xf_bounds.p1);
+
+    vLocalPos = info.local_pos;
+
+    if (info.edge_flags == 0) {
+        v_flags.x = 0;
+    } else {
+        v_flags.x = 1;
+    }
+#endif
+
+    if ((info.quad_flags & QF_SAMPLE_AS_MASK) != 0) {
+        v_flags.z = 1;
+    } else {
+        v_flags.z = 0;
+    }
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    vec4 color = v_color;
+
+#ifndef SWGL_ANTIALIAS
+    if (v_flags.x != 0) {
+        float alpha = init_transform_fs(vLocalPos);
+        color *= alpha;
+    }
+#endif
+
+    if (v_flags.y != 0) {
+        vec2 uv = clamp(v_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
+        vec4 texel = TEX_SAMPLE(sColor0, uv);
+        if (v_flags.z != 0) {
+            texel = texel.rrrr;
+        }
+        color *= texel;
+    }
+
+    oFragColor = color;
+}
+
+#if defined(SWGL_DRAW_SPAN)
+void swgl_drawSpanRGBA8() {
+    if (v_flags.y != 0) {
+        if (v_flags.z != 0) {
+            // Fall back to fragment shader as we don't specialize for mask yet. Perhaps
+            // we can use an existing swgl commit or add a new one though?
+        } else {
+            swgl_commitTextureLinearColorRGBA8(sColor0, v_uv, v_uv_sample_bounds, v_color);
+        }
+    } else {
+        swgl_commitSolidRGBA8(v_color);
+    }
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/ps_split_composite.glsl b/gfx/wr/webrender/res/ps_split_composite.glsl
new file mode 100644
index 0000000000..c07eca1371
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_split_composite.glsl
@@ -0,0 +1,134 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define WR_FEATURE_TEXTURE_2D
+
+#include shared,prim_shared
+
+// interpolated UV coordinates to sample.
+varying highp vec2 vUv;
+
+// Flag to allow perspective interpolation of UV.
+// Packed in to a vector to work around bug 1630356.
+flat varying mediump vec2 vPerspective;
+
+flat varying highp vec4 vUvSampleBounds;
+
+#ifdef WR_VERTEX_SHADER
+struct SplitGeometry {
+    vec2 local[4];
+};
+
+SplitGeometry fetch_split_geometry(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+
+    vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0));
+    vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0));
+
+    SplitGeometry geo;
+    geo.local = vec2[4](
+        data0.xy,
+        data0.zw,
+        data1.xy,
+        data1.zw
+    );
+
+    return geo;
+}
+
+vec2 bilerp(vec2 a, vec2 b, vec2 c, vec2 d, float s, float t) {
+    vec2 x = mix(a, b, t);
+    vec2 y = mix(c, d, t);
+    return mix(x, y, s);
+}
+
+struct SplitCompositeInstance {
+    int prim_header_index;
+    int polygons_address;
+    float z;
+    int render_task_index;
+};
+
+SplitCompositeInstance fetch_composite_instance() {
+    SplitCompositeInstance ci;
+
+    ci.prim_header_index = aData.x;
+    ci.polygons_address = aData.y;
+    ci.z = float(aData.z);
+    ci.render_task_index = aData.w;
+
+    return ci;
+}
+
+void main(void) {
+    SplitCompositeInstance ci = fetch_composite_instance();
+    SplitGeometry geometry = fetch_split_geometry(ci.polygons_address);
+    PrimitiveHeader ph = fetch_prim_header(ci.prim_header_index);
+    PictureTask dest_task = fetch_picture_task(ci.render_task_index);
+    Transform transform = fetch_transform(ph.transform_id);
+    ImageSource res = fetch_image_source(ph.user_data.x);
+    ClipArea clip_area = fetch_clip_area(ph.user_data.w);
+
+    vec2 dest_origin = dest_task.task_rect.p0 -
+                       dest_task.content_origin;
+
+    vec2 local_pos = bilerp(geometry.local[0], geometry.local[1],
+                            geometry.local[3], geometry.local[2],
+                            aPosition.y, aPosition.x);
+    vec4 world_pos = transform.m * vec4(local_pos, 0.0, 1.0);
+
+    vec4 final_pos = vec4(
+        dest_origin * world_pos.w + world_pos.xy * dest_task.device_pixel_scale,
+        world_pos.w * ci.z,
+        world_pos.w
+    );
+
+    write_clip(
+        world_pos,
+        clip_area,
+        dest_task
+    );
+
+    gl_Position = uTransform * final_pos;
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 uv0 = res.uv_rect.p0;
+    vec2 uv1 = res.uv_rect.p1;
+
+    vec2 min_uv = min(uv0, uv1);
+    vec2 max_uv = max(uv0, uv1);
+
+    vUvSampleBounds = vec4(
+        min_uv + vec2(0.5),
+        max_uv - vec2(0.5)
+    ) / texture_size.xyxy;
+
+    vec2 f = (local_pos - ph.local_rect.p0) / rect_size(ph.local_rect);
+    f = get_image_quad_uv(ph.user_data.x, f);
+    vec2 uv = mix(uv0, uv1, f);
+    float perspective_interpolate = float(ph.user_data.y);
+
+    vUv = uv / texture_size * mix(gl_Position.w, 1.0, perspective_interpolate);
+    vPerspective.x = perspective_interpolate;
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+void main(void) {
+    float alpha = do_clip();
+    float perspective_divisor = mix(gl_FragCoord.w, 1.0, vPerspective.x);
+    vec2 uv = clamp(vUv * perspective_divisor, vUvSampleBounds.xy, vUvSampleBounds.zw);
+    write_output(alpha * texture(sColor0, uv));
+}
+
+#ifdef SWGL_DRAW_SPAN
+void swgl_drawSpanRGBA8() {
+    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, vPerspective.x);
+    vec2 uv = vUv * perspective_divisor;
+
+    swgl_commitTextureRGBA8(sColor0, uv, vUvSampleBounds);
+}
+#endif
+
+#endif
diff --git a/gfx/wr/webrender/res/ps_text_run.glsl b/gfx/wr/webrender/res/ps_text_run.glsl
new file mode 100644
index 0000000000..aabc1e9d8a
--- /dev/null
+++ b/gfx/wr/webrender/res/ps_text_run.glsl
@@ -0,0 +1,340 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared,prim_shared
+
+flat varying mediump vec4 v_color;
+flat varying mediump vec3 v_mask_swizzle;
+// Normalized bounds of the source image in the texture.
+flat varying highp vec4 v_uv_bounds;
+
+// Interpolated UV coordinates to sample.
+varying highp vec2 v_uv;
+
+
+#if defined(WR_FEATURE_GLYPH_TRANSFORM) && !defined(SWGL_CLIP_DIST)
+varying highp vec4 v_uv_clip;
+#endif
+
+#ifdef WR_VERTEX_SHADER
+
+#define VECS_PER_TEXT_RUN           1
+#define GLYPHS_PER_GPU_BLOCK        2U
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+RectWithEndpoint transform_rect(RectWithEndpoint rect, mat2 transform) {
+    vec2 size = rect_size(rect);
+    vec2 center = transform * (rect.p0 + size * 0.5);
+    vec2 radius = mat2(abs(transform[0]), abs(transform[1])) * (size * 0.5);
+    return RectWithEndpoint(center - radius, center + radius);
+}
+
+bool rect_inside_rect(RectWithEndpoint little, RectWithEndpoint big) {
+    return all(lessThanEqual(vec4(big.p0, little.p1), vec4(little.p0, big.p1)));
+}
+#endif //WR_FEATURE_GLYPH_TRANSFORM
+
+struct Glyph {
+    vec2 offset;
+};
+
+Glyph fetch_glyph(int specific_prim_address,
+                  int glyph_index) {
+    // Two glyphs are packed in each texel in the GPU cache.
+    int glyph_address = specific_prim_address +
+                        VECS_PER_TEXT_RUN +
+                        int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK);
+    vec4 data = fetch_from_gpu_cache_1(glyph_address);
+    // Select XY or ZW based on glyph index.
+    vec2 glyph = mix(data.xy, data.zw,
+                     bvec2(uint(glyph_index) % GLYPHS_PER_GPU_BLOCK == 1U));
+
+    return Glyph(glyph);
+}
+
+struct GlyphResource {
+    vec4 uv_rect;
+    vec2 offset;
+    float scale;
+};
+
+GlyphResource fetch_glyph_resource(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    return GlyphResource(data[0], data[1].xy, data[1].z);
+}
+
+struct TextRun {
+    vec4 color;
+};
+
+TextRun fetch_text_run(int address) {
+    vec4 data = fetch_from_gpu_cache_1(address);
+    return TextRun(data);
+}
+
+vec2 get_snap_bias(int subpx_dir) {
+    // In subpixel mode, the subpixel offset has already been
+    // accounted for while rasterizing the glyph. However, we
+    // must still round with a subpixel bias rather than rounding
+    // to the nearest whole pixel, depending on subpixel direciton.
+    switch (subpx_dir) {
+        case SUBPX_DIR_NONE:
+        default:
+            return vec2(0.5);
+        case SUBPX_DIR_HORIZONTAL:
+            // Glyphs positioned [-0.125, 0.125] get a
+            // subpx position of zero. So include that
+            // offset in the glyph position to ensure
+            // we round to the correct whole position.
+            return vec2(0.125, 0.5);
+        case SUBPX_DIR_VERTICAL:
+            return vec2(0.5, 0.125);
+        case SUBPX_DIR_MIXED:
+            return vec2(0.125);
+    }
+}
+
+void main() {
+    Instance instance = decode_instance_attributes();
+    PrimitiveHeader ph = fetch_prim_header(instance.prim_header_address);
+    Transform transform = fetch_transform(ph.transform_id);
+    ClipArea clip_area = fetch_clip_area(instance.clip_address);
+    PictureTask task = fetch_picture_task(instance.picture_task_address);
+
+    int glyph_index = instance.segment_index;
+    int subpx_dir = (instance.flags >> 8) & 0xff;
+    int color_mode = instance.flags & 0xff;
+
+    // Note that the reference frame relative offset is stored in the prim local
+    // rect size during batching, instead of the actual size of the primitive.
+    TextRun text = fetch_text_run(ph.specific_prim_address);
+    vec2 text_offset = ph.local_rect.p1;
+
+    // Note that the unsnapped reference frame relative offset has already
+    // been subtracted from the prim local rect origin during batching.
+    // It was done this way to avoid pushing both the snapped and the
+    // unsnapped offsets to the shader.
+    Glyph glyph = fetch_glyph(ph.specific_prim_address, glyph_index);
+    glyph.offset += ph.local_rect.p0;
+
+    GlyphResource res = fetch_glyph_resource(instance.resource_address);
+
+    vec2 snap_bias = get_snap_bias(subpx_dir);
+
+    // Glyph space refers to the pixel space used by glyph rasterization during frame
+    // building. If a non-identity transform was used, WR_FEATURE_GLYPH_TRANSFORM will
+    // be set. Otherwise, regardless of whether the raster space is LOCAL or SCREEN,
+    // we ignored the transform during glyph rasterization, and need to snap just using
+    // the device pixel scale and the raster scale.
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    // Transform from local space to glyph space.
+    mat2 glyph_transform = mat2(transform.m) * task.device_pixel_scale;
+    vec2 glyph_translation = transform.m[3].xy * task.device_pixel_scale;
+
+    // Transform from glyph space back to local space.
+    mat2 glyph_transform_inv = inverse(glyph_transform);
+
+    // Glyph raster pixels include the impact of the transform. This path can only be
+    // entered for 3d transforms that can be coerced into a 2d transform; they have no
+    // perspective, and have a 2d inverse. This is a looser condition than axis aligned
+    // transforms because it also allows 2d rotations.
+    vec2 raster_glyph_offset = floor(glyph_transform * glyph.offset + snap_bias);
+
+    // We want to eliminate any subpixel translation in device space to ensure glyph
+    // snapping is stable for equivalent glyph subpixel positions. Note that we must take
+    // into account the translation from the transform for snapping purposes.
+    vec2 raster_text_offset = floor(glyph_transform * text_offset + glyph_translation + 0.5) - glyph_translation;
+
+    vec2 glyph_origin = res.offset + raster_glyph_offset + raster_text_offset;
+    // Compute the glyph rect in glyph space.
+    RectWithEndpoint glyph_rect = RectWithEndpoint(
+        glyph_origin,
+        glyph_origin + res.uv_rect.zw - res.uv_rect.xy
+    );
+
+    // The glyph rect is in glyph space, so transform it back to local space.
+    RectWithEndpoint local_rect = transform_rect(glyph_rect, glyph_transform_inv);
+
+    // Select the corner of the glyph's local space rect that we are processing.
+    vec2 local_pos = mix(local_rect.p0, local_rect.p1, aPosition.xy);
+
+    // If the glyph's local rect would fit inside the local clip rect, then select a corner from
+    // the device space glyph rect to reduce overdraw of clipped pixels in the fragment shader.
+    // Otherwise, fall back to clamping the glyph's local rect to the local clip rect.
+    if (rect_inside_rect(local_rect, ph.local_clip_rect)) {
+        local_pos = glyph_transform_inv * mix(glyph_rect.p0, glyph_rect.p1, aPosition.xy);
+    }
+#else
+    float raster_scale = float(ph.user_data.x) / 65535.0;
+
+    // Scale in which the glyph is snapped when rasterized.
+    float glyph_raster_scale = raster_scale * task.device_pixel_scale;
+
+    // Scale from glyph space to local space.
+    float glyph_scale_inv = res.scale / glyph_raster_scale;
+
+    // Glyph raster pixels do not include the impact of the transform. Instead it was
+    // replaced with an identity transform during glyph rasterization. As such only the
+    // impact of the raster scale (if in local space) and the device pixel scale (for both
+    // local and screen space) are included.
+    //
+    // This implies one or more of the following conditions:
+    // - The transform is an identity. In that case, setting WR_FEATURE_GLYPH_TRANSFORM
+    //   should have the same output result as not. We just distingush which path to use
+    //   based on the transform used during glyph rasterization. (Screen space).
+    // - The transform contains an animation. We will imply local raster space in such
+    //   cases to avoid constantly rerasterizing the glyphs.
+    // - The transform has perspective or does not have a 2d inverse (Screen or local space).
+    // - The transform's scale will result in result in very large rasterized glyphs and
+    //   we clamped the size. This will imply local raster space.
+    vec2 raster_glyph_offset = floor(glyph.offset * glyph_raster_scale + snap_bias) / res.scale;
+
+    // Compute the glyph rect in local space.
+    //
+    // The transform may be animated, so we don't want to do any snapping here for the
+    // text offset to avoid glyphs wiggling. The text offset should have been snapped
+    // already for axis aligned transforms excluding any animations during frame building.
+    vec2 glyph_origin = glyph_scale_inv * (res.offset + raster_glyph_offset) + text_offset;
+    RectWithEndpoint glyph_rect = RectWithEndpoint(
+        glyph_origin,
+        glyph_origin + glyph_scale_inv * (res.uv_rect.zw - res.uv_rect.xy)
+    );
+
+    // Select the corner of the glyph rect that we are processing.
+    vec2 local_pos = mix(glyph_rect.p0, glyph_rect.p1, aPosition.xy);
+#endif
+
+    VertexInfo vi = write_vertex(
+        local_pos,
+        ph.local_clip_rect,
+        ph.z,
+        transform,
+        task
+    );
+
+#ifdef WR_FEATURE_GLYPH_TRANSFORM
+    vec2 f = (glyph_transform * vi.local_pos - glyph_rect.p0) / rect_size(glyph_rect);
+    #ifdef SWGL_CLIP_DIST
+        gl_ClipDistance[0] = f.x;
+        gl_ClipDistance[1] = f.y;
+        gl_ClipDistance[2] = 1.0 - f.x;
+        gl_ClipDistance[3] = 1.0 - f.y;
+    #else
+        v_uv_clip = vec4(f, 1.0 - f);
+    #endif
+#else
+    vec2 f = (vi.local_pos - glyph_rect.p0) / rect_size(glyph_rect);
+#endif
+
+    write_clip(vi.world_pos, clip_area, task);
+
+    switch (color_mode) {
+        case COLOR_MODE_ALPHA:
+            v_mask_swizzle = vec3(0.0, 1.0, 1.0);
+            v_color = text.color;
+            break;
+        case COLOR_MODE_BITMAP_SHADOW:
+            #ifdef SWGL_BLEND
+                swgl_blendDropShadow(text.color);
+                v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec3(0.0, 1.0, 0.0);
+                v_color = text.color;
+            #endif
+            break;
+        case COLOR_MODE_COLOR_BITMAP:
+            v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+            v_color = vec4(text.color.a);
+            break;
+        case COLOR_MODE_SUBPX_DUAL_SOURCE:
+            #ifdef SWGL_BLEND
+                swgl_blendSubpixelText(text.color);
+                v_mask_swizzle = vec3(1.0, 0.0, 0.0);
+                v_color = vec4(1.0);
+            #else
+                v_mask_swizzle = vec3(text.color.a, 0.0, 0.0);
+                v_color = text.color;
+            #endif
+            break;
+        default:
+            v_mask_swizzle = vec3(0.0, 0.0, 0.0);
+            v_color = vec4(1.0);
+    }
+
+    vec2 texture_size = vec2(TEX_SIZE(sColor0));
+    vec2 st0 = res.uv_rect.xy / texture_size;
+    vec2 st1 = res.uv_rect.zw / texture_size;
+
+    v_uv = mix(st0, st1, f);
+    v_uv_bounds = (res.uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
+}
+
+#endif // WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+Fragment text_fs(void) {
+    Fragment frag;
+
+    vec2 tc = clamp(v_uv, v_uv_bounds.xy, v_uv_bounds.zw);
+    vec4 mask = texture(sColor0, tc);
+    // v_mask_swizzle.z != 0 means we are using an R8 texture as alpha,
+    // and therefore must swizzle from the r channel to all channels.
+    mask = mix(mask, mask.rrrr, bvec4(v_mask_swizzle.z != 0.0));
+    #ifndef WR_FEATURE_DUAL_SOURCE_BLENDING
+        mask.rgb = mask.rgb * v_mask_swizzle.x + mask.aaa * v_mask_swizzle.y;
+    #endif
+
+    #if defined(WR_FEATURE_GLYPH_TRANSFORM) && !defined(SWGL_CLIP_DIST)
+        mask *= float(all(greaterThanEqual(v_uv_clip, vec4(0.0))));
+    #endif
+
+    frag.color = v_color * mask;
+
+    #if defined(WR_FEATURE_DUAL_SOURCE_BLENDING) && !defined(SWGL_BLEND)
+        frag.blend = mask * v_mask_swizzle.x + mask.aaaa * v_mask_swizzle.y;
+    #endif
+
+    return frag;
+}
+
+
+void main() {
+    Fragment frag = text_fs();
+
+    float clip_mask = do_clip();
+    frag.color *= clip_mask;
+
+    #if defined(WR_FEATURE_DEBUG_OVERDRAW)
+        oFragColor = WR_DEBUG_OVERDRAW_COLOR;
+    #elif defined(WR_FEATURE_DUAL_SOURCE_BLENDING) && !defined(SWGL_BLEND)
+        oFragColor = frag.color;
+        oFragBlend = frag.blend * clip_mask;
+    #else
+        write_output(frag.color);
+    #endif
+}
+
+#if defined(SWGL_DRAW_SPAN) && defined(SWGL_BLEND) && defined(SWGL_CLIP_DIST)
+void swgl_drawSpanRGBA8() {
+    // Only support simple swizzles for now. More complex swizzles must either
+    // be handled by blend overrides or the slow path.
+    if (v_mask_swizzle.x != 0.0 && v_mask_swizzle.x != 1.0) {
+        return;
+    }
+
+    #ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+        swgl_commitTextureLinearRGBA8(sColor0, v_uv, v_uv_bounds);
+    #else
+        if (swgl_isTextureR8(sColor0)) {
+            swgl_commitTextureLinearColorR8ToRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        } else {
+            swgl_commitTextureLinearColorRGBA8(sColor0, v_uv, v_uv_bounds, v_color);
+        }
+    #endif
+}
+#endif
+
+#endif // WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/rect.glsl b/gfx/wr/webrender/res/rect.glsl
new file mode 100644
index 0000000000..2a080ee393
--- /dev/null
+++ b/gfx/wr/webrender/res/rect.glsl
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+struct RectWithSize {
+    vec2 p0;
+    vec2 size;
+};
+
+struct RectWithEndpoint {
+    vec2 p0;
+    vec2 p1;
+};
+
+float point_inside_rect(vec2 p, vec2 p0, vec2 p1) {
+    vec2 s = step(p0, p) - step(p1, p);
+    return s.x * s.y;
+}
+
+vec2 signed_distance_rect_xy(vec2 pos, vec2 p0, vec2 p1) {
+    // Instead of using a true signed distance to rect here, we just use the
+    // simpler approximation of the maximum distance on either axis from the
+    // outside of the rectangle. This avoids expensive use of length() and only
+    // causes mostly imperceptible differences at corner pixels.
+    return max(p0 - pos, pos - p1);
+}
+
+float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
+    // Collapse the per-axis distances to edges to a single approximate value.
+    vec2 d = signed_distance_rect_xy(pos, p0, p1);
+    return max(d.x, d.y);
+}
+
+vec2 rect_clamp(RectWithEndpoint rect, vec2 pt) {
+    return clamp(pt, rect.p0, rect.p1);
+}
+
+vec2 rect_size(RectWithEndpoint rect) {
+    return rect.p1 - rect.p0;
+}
diff --git a/gfx/wr/webrender/res/render_task.glsl b/gfx/wr/webrender/res/render_task.glsl
new file mode 100644
index 0000000000..cd9aea402c
--- /dev/null
+++ b/gfx/wr/webrender/res/render_task.glsl
@@ -0,0 +1,102 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+
+#ifdef WR_VERTEX_SHADER
+#define VECS_PER_RENDER_TASK        2U
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sRenderTasks;
+
+struct RenderTaskData {
+    RectWithEndpoint task_rect;
+    vec4 user_data;
+};
+
+// See RenderTaskData in render_task.rs
+RenderTaskData fetch_render_task_data(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+
+    RectWithEndpoint task_rect = RectWithEndpoint(
+        texel0.xy,
+        texel0.zw
+    );
+
+    RenderTaskData data = RenderTaskData(
+        task_rect,
+        texel1
+    );
+
+    return data;
+}
+
+RectWithEndpoint fetch_render_task_rect(int index) {
+    ivec2 uv = get_fetch_uv(index, VECS_PER_RENDER_TASK);
+
+    vec4 texel0 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(0, 0));
+    vec4 texel1 = TEXEL_FETCH(sRenderTasks, uv, 0, ivec2(1, 0));
+
+    RectWithEndpoint task_rect = RectWithEndpoint(
+        texel0.xy,
+        texel0.zw
+    );
+
+    return task_rect;
+}
+
+#define PIC_TYPE_IMAGE          1
+#define PIC_TYPE_TEXT_SHADOW    2
+
+/*
+ The dynamic picture that this brush exists on. Right now, it
+ contains minimal information. In the future, it will describe
+ the transform mode of primitives on this picture, among other things.
+ */
+struct PictureTask {
+    RectWithEndpoint task_rect;
+    float device_pixel_scale;
+    vec2 content_origin;
+};
+
+PictureTask fetch_picture_task(int address) {
+    RenderTaskData task_data = fetch_render_task_data(address);
+
+    PictureTask task = PictureTask(
+        task_data.task_rect,
+        task_data.user_data.x,
+        task_data.user_data.yz
+    );
+
+    return task;
+}
+
+#define CLIP_TASK_EMPTY 0x7FFF
+
+struct ClipArea {
+    RectWithEndpoint task_rect;
+    float device_pixel_scale;
+    vec2 screen_origin;
+};
+
+ClipArea fetch_clip_area(int index) {
+    ClipArea area;
+
+    if (index >= CLIP_TASK_EMPTY) {
+        area.task_rect = RectWithEndpoint(vec2(0.0), vec2(0.0));
+        area.device_pixel_scale = 0.0;
+        area.screen_origin = vec2(0.0);
+    } else {
+        RenderTaskData task_data = fetch_render_task_data(index);
+
+        area.task_rect = task_data.task_rect;
+        area.device_pixel_scale = task_data.user_data.x;
+        area.screen_origin = task_data.user_data.yz;
+    }
+
+    return area;
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/wr/webrender/res/shared.glsl b/gfx/wr/webrender/res/shared.glsl
new file mode 100644
index 0000000000..dc1b9ea2e1
--- /dev/null
+++ b/gfx/wr/webrender/res/shared.glsl
@@ -0,0 +1,237 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL
+// Please check https://www.khronos.org/registry/OpenGL/extensions/OES/OES_EGL_image_external_essl3.txt
+// for this extension.
+#extension GL_OES_EGL_image_external_essl3 : require
+#endif
+
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_ESSL1
+// Some GLES 3 devices do not support GL_OES_EGL_image_external_essl3, so we
+// must use GL_OES_EGL_image_external instead and make the shader ESSL1
+// compatible.
+#extension GL_OES_EGL_image_external : require
+#endif
+
+#ifdef WR_FEATURE_TEXTURE_EXTERNAL_BT709
+#extension GL_EXT_YUV_target : require
+#endif
+
+#ifdef WR_FEATURE_ADVANCED_BLEND
+#extension GL_KHR_blend_equation_advanced : require
+#endif
+
+#ifdef WR_FEATURE_DUAL_SOURCE_BLENDING
+#ifdef GL_ES
+#extension GL_EXT_blend_func_extended : require
+#else
+#extension GL_ARB_explicit_attrib_location : require
+#endif
+#endif
+
+#include base
+
+#if defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+#define TEX_SAMPLE(sampler, tex_coord) texture2D(sampler, tex_coord.xy)
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_BT709)
+// Force conversion from yuv to rgb using BT709 colorspace
+#define TEX_SAMPLE(sampler, tex_coord) vec4(yuv_2_rgb(texture(sampler, tex_coord.xy).xyz, itu_709), 1.0)
+#else
+#define TEX_SAMPLE(sampler, tex_coord) texture(sampler, tex_coord.xy)
+#endif
+
+#if defined(WR_FEATURE_TEXTURE_EXTERNAL) && defined(PLATFORM_ANDROID)
+// On some Mali GPUs we have encountered crashes in glDrawElements when using
+// textureSize(samplerExternalOES) in a vertex shader without potentially
+// sampling from the texture. This tricks the driver in to thinking the texture
+// may be sampled from, avoiding the crash. See bug 1692848.
+uniform bool u_mali_workaround_dummy;
+#define TEX_SIZE(sampler) (u_mali_workaround_dummy ? ivec2(texture(sampler, vec2(0.0, 0.0)).rr) : textureSize(sampler, 0))
+#else
+#define TEX_SIZE(sampler) textureSize(sampler, 0)
+#endif
+
+//======================================================================================
+// Vertex shader attributes and uniforms
+//======================================================================================
+#ifdef WR_VERTEX_SHADER
+    // Uniform inputs
+    uniform mat4 uTransform;       // Orthographic projection
+
+    // Attribute inputs
+    attribute vec2 aPosition;
+
+    // get_fetch_uv is a macro to work around a macOS Intel driver parsing bug.
+    // TODO: convert back to a function once the driver issues are resolved, if ever.
+    // https://github.com/servo/webrender/pull/623
+    // https://github.com/servo/servo/issues/13953
+    // Do the division with unsigned ints because that's more efficient with D3D
+    #define get_fetch_uv(i, vpi)  ivec2(int(vpi * (uint(i) % (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi))), int(uint(i) / (WR_MAX_VERTEX_TEXTURE_WIDTH/vpi)))
+#endif
+
+//======================================================================================
+// Fragment shader attributes and uniforms
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+    // Uniform inputs
+
+    // Fragment shader outputs
+    #ifdef WR_FEATURE_ADVANCED_BLEND
+        layout(blend_support_all_equations) out;
+    #endif
+
+    #if __VERSION__ == 100
+        #define oFragColor gl_FragColor
+    #elif defined(WR_FEATURE_DUAL_SOURCE_BLENDING)
+        layout(location = 0, index = 0) out vec4 oFragColor;
+        layout(location = 0, index = 1) out vec4 oFragBlend;
+    #else
+        out vec4 oFragColor;
+    #endif
+
+    // Write an output color in normal shaders.
+    void write_output(vec4 color) {
+        oFragColor = color;
+    }
+
+    #define EPSILON                     0.0001
+
+    // "Show Overdraw" color. Premultiplied.
+    #define WR_DEBUG_OVERDRAW_COLOR     vec4(0.110, 0.077, 0.027, 0.125)
+
+    float distance_to_line(vec2 p0, vec2 perp_dir, vec2 p) {
+        vec2 dir_to_p0 = p0 - p;
+        return dot(normalize(perp_dir), dir_to_p0);
+    }
+
+// fwidth is not defined in ESSL 1, but that's okay because we don't need
+// it for any ESSL 1 shader variants.
+#if __VERSION__ != 100
+    /// Find the appropriate half range to apply the AA approximation over.
+    /// This range represents a coefficient to go from one CSS pixel to half a device pixel.
+    vec2 compute_aa_range_xy(vec2 position) {
+        return fwidth(position);
+    }
+
+    float compute_aa_range(vec2 position) {
+        // The constant factor is chosen to compensate for the fact that length(fw) is equal
+        // to sqrt(2) times the device pixel ratio in the typical case.
+        //
+        // This coefficient is chosen to ensure that any sample 0.5 pixels or more inside of
+        // the shape has no anti-aliasing applied to it (since pixels are sampled at their center,
+        // such a pixel (axis aligned) is fully inside the border). We need this so that antialiased
+        // curves properly connect with non-antialiased vertical or horizontal lines, among other things.
+        //
+        // Lines over a half-pixel away from the pixel center *can* intersect with the pixel square;
+        // indeed, unless they are horizontal or vertical, they are guaranteed to. However, choosing
+        // a nonzero area for such pixels causes noticeable artifacts at the junction between an anti-
+        // aliased corner and a straight edge.
+        //
+        // We may want to adjust this constant in specific scenarios (for example keep the principled
+        // value for straight edges where we want pixel-perfect equivalence with non antialiased lines
+        // when axis aligned, while selecting a larger and smoother aa range on curves).
+        //
+        // As a further optimization, we compute the reciprocal of this range, such that we
+        // can then use the cheaper inversesqrt() instead of length(). This also elides a
+        // division that would otherwise be necessary inside distance_aa.
+        #ifdef SWGL
+            // SWGL uses an approximation for fwidth() such that it returns equal x and y.
+            // Thus, sqrt(2)/length(w) = sqrt(2)/sqrt(x*x + x*x) = recip(x).
+            return recip(fwidth(position).x);
+        #else
+            // sqrt(2)/length(w) = inversesqrt(0.5 * dot(w, w))
+            vec2 w = fwidth(position);
+            return inversesqrt(0.5 * dot(w, w));
+        #endif
+    }
+#endif
+
+    /// Return the blending coefficient for distance antialiasing.
+    ///
+    /// 0.0 means inside the shape, 1.0 means outside.
+    ///
+    /// This makes the simplifying assumption that the area of a 1x1 pixel square
+    /// under a line is reasonably similar to just the signed Euclidian distance
+    /// from the center of the square to that line. This diverges slightly from
+    /// better approximations of the exact area, but the difference between the
+    /// methods is not perceptibly noticeable, while this approximation is much
+    /// faster to compute.
+    ///
+    /// See the comments in `compute_aa_range()` for more information on the
+    /// cutoff values of -0.5 and 0.5.
+    float distance_aa_xy(vec2 aa_range, vec2 signed_distance) {
+        // The aa_range is the raw per-axis filter width, so we need to divide
+        // the local signed distance by the filter width to get an approximation
+        // of screen distance.
+        #ifdef SWGL
+            // The SWGL fwidth() approximation returns uniform X and Y ranges.
+            vec2 dist = signed_distance * recip(aa_range.x);
+        #else
+            vec2 dist = signed_distance / aa_range;
+        #endif
+        // Choose whichever axis is further outside the rectangle for AA.
+        return clamp(0.5 - max(dist.x, dist.y), 0.0, 1.0);
+    }
+
+    float distance_aa(float aa_range, float signed_distance) {
+        // The aa_range is already stored as a reciprocal with uniform scale,
+        // so just multiply it, then use that for AA.
+        float dist = signed_distance * aa_range;
+        return clamp(0.5 - dist, 0.0, 1.0);
+    }
+
+    /// Component-wise selection.
+    ///
+    /// The idea of using this is to ensure both potential branches are executed before
+    /// selecting the result, to avoid observable timing differences based on the condition.
+    ///
+    /// Example usage: color = if_then_else(LessThanEqual(color, vec3(0.5)), vec3(0.0), vec3(1.0));
+    ///
+    /// The above example sets each component to 0.0 or 1.0 independently depending on whether
+    /// their values are below or above 0.5.
+    ///
+    /// This is written as a macro in order to work with vectors of any dimension.
+    ///
+    /// Note: Some older android devices don't support mix with bvec. If we ever run into them
+    /// the only option we have is to polyfill it with a branch per component.
+    #define if_then_else(cond, then_branch, else_branch) mix(else_branch, then_branch, cond)
+#endif
+
+//======================================================================================
+// Shared shader uniforms
+//======================================================================================
+#ifdef WR_FEATURE_TEXTURE_2D
+uniform sampler2D sColor0;
+uniform sampler2D sColor1;
+uniform sampler2D sColor2;
+#elif defined WR_FEATURE_TEXTURE_RECT
+uniform sampler2DRect sColor0;
+uniform sampler2DRect sColor1;
+uniform sampler2DRect sColor2;
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL) || defined(WR_FEATURE_TEXTURE_EXTERNAL_ESSL1)
+uniform samplerExternalOES sColor0;
+uniform samplerExternalOES sColor1;
+uniform samplerExternalOES sColor2;
+#elif defined(WR_FEATURE_TEXTURE_EXTERNAL_BT709)
+uniform __samplerExternal2DY2YEXT sColor0;
+uniform __samplerExternal2DY2YEXT sColor1;
+uniform __samplerExternal2DY2YEXT sColor2;
+#endif
+
+#ifdef WR_FEATURE_DITHERING
+uniform sampler2D sDither;
+#endif
+
+//======================================================================================
+// Interpolator definitions
+//======================================================================================
+
+//======================================================================================
+// VS only types and UBOs
+//======================================================================================
+
+//======================================================================================
+// VS only functions
+//======================================================================================
diff --git a/gfx/wr/webrender/res/shared_other.glsl b/gfx/wr/webrender/res/shared_other.glsl
new file mode 100644
index 0000000000..03cad173cd
--- /dev/null
+++ b/gfx/wr/webrender/res/shared_other.glsl
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//======================================================================================
+// Vertex shader attributes and uniforms
+//======================================================================================
+#ifdef WR_VERTEX_SHADER
+#endif
+
+//======================================================================================
+// Fragment shader attributes and uniforms
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+#endif
+
+//======================================================================================
+// Interpolator definitions
+//======================================================================================
+
+//======================================================================================
+// VS only types and UBOs
+//======================================================================================
+
+//======================================================================================
+// VS only functions
+//======================================================================================
+
+//======================================================================================
+// FS only functions
+//======================================================================================
+#ifdef WR_FRAGMENT_SHADER
+#endif
diff --git a/gfx/wr/webrender/res/transform.glsl b/gfx/wr/webrender/res/transform.glsl
new file mode 100644
index 0000000000..d068b26c0e
--- /dev/null
+++ b/gfx/wr/webrender/res/transform.glsl
@@ -0,0 +1,140 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+flat varying highp vec4 vTransformBounds;
+
+#ifdef WR_VERTEX_SHADER
+
+#define VECS_PER_TRANSFORM   8U
+uniform HIGHP_SAMPLER_FLOAT sampler2D sTransformPalette;
+
+void init_transform_vs(vec4 local_bounds) {
+    vTransformBounds = local_bounds;
+}
+
+struct Transform {
+    mat4 m;
+    mat4 inv_m;
+    bool is_axis_aligned;
+};
+
+Transform fetch_transform(int id) {
+    Transform transform;
+
+    transform.is_axis_aligned = (id >> 23) == 0;
+    int index = id & 0x007fffff;
+
+    // Create a UV base coord for each 8 texels.
+    // This is required because trying to use an offset
+    // of more than 8 texels doesn't work on some versions
+    // of macOS.
+    ivec2 uv = get_fetch_uv(index, VECS_PER_TRANSFORM);
+    ivec2 uv0 = ivec2(uv.x + 0, uv.y);
+
+    transform.m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(0, 0));
+    transform.m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(1, 0));
+    transform.m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(2, 0));
+    transform.m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(3, 0));
+
+    transform.inv_m[0] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(4, 0));
+    transform.inv_m[1] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(5, 0));
+    transform.inv_m[2] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(6, 0));
+    transform.inv_m[3] = TEXEL_FETCH(sTransformPalette, uv0, 0, ivec2(7, 0));
+
+    return transform;
+}
+
+// Return the intersection of the plane (set up by "normal" and "point")
+// with the ray (set up by "ray_origin" and "ray_dir"),
+// writing the resulting scaler into "t".
+bool ray_plane(vec3 normal, vec3 pt, vec3 ray_origin, vec3 ray_dir, out float t)
+{
+    float denom = dot(normal, ray_dir);
+    if (abs(denom) > 1e-6) {
+        vec3 d = pt - ray_origin;
+        t = dot(d, normal) / denom;
+        return t >= 0.0;
+    }
+
+    return false;
+}
+
+// Apply the inverse transform "inv_transform"
+// to the reference point "ref" in CSS space,
+// producing a local point on a Transform plane,
+// set by a base point "a" and a normal "n".
+vec4 untransform(vec2 ref, vec3 n, vec3 a, mat4 inv_transform) {
+    vec3 p = vec3(ref, -10000.0);
+    vec3 d = vec3(0, 0, 1.0);
+
+    float t = 0.0;
+    // get an intersection of the Transform plane with Z axis vector,
+    // originated from the "ref" point
+    ray_plane(n, a, p, d, t);
+    float z = p.z + d.z * t; // Z of the visible point on the Transform
+
+    vec4 r = inv_transform * vec4(ref, z, 1.0);
+    return r;
+}
+
+// Given a CSS space position, transform it back into the Transform space.
+vec4 get_node_pos(vec2 pos, Transform transform) {
+    // get a point on the scroll node plane
+    vec4 ah = transform.m * vec4(0.0, 0.0, 0.0, 1.0);
+    vec3 a = ah.xyz / ah.w;
+
+    // get the normal to the scroll node plane
+    vec3 n = transpose(mat3(transform.inv_m)) * vec3(0.0, 0.0, 1.0);
+    return untransform(pos, n, a, transform.inv_m);
+}
+
+#endif //WR_VERTEX_SHADER
+
+#ifdef WR_FRAGMENT_SHADER
+
+// Assume transform bounds are set to a large scale to signal they are invalid.
+bool has_valid_transform_bounds() {
+    return vTransformBounds.w < 1.0e15;
+}
+
+float init_transform_fs(vec2 local_pos) {
+    // Ideally we want to track distances in screen space after transformation
+    // as signed distance calculations lose context about the direction vector
+    // to exit the geometry, merely remembering the minimum distance to the
+    // exit. However, we can't always sanely track distances in screen space
+    // due to perspective transforms, clipping, and other concerns, so we do
+    // this in local space. However, this causes problems tracking distances
+    // in local space when attempting to scale by a uniform AA range later in
+    // the presence of a transform which actually has non-uniform scaling.
+    //
+    // To work around this, we independently track the distances on the local
+    // space X and Y axes and then scale them by the independent AA ranges (as
+    // computed from fwidth derivatives) for the X and Y axes. This can break
+    // down at certain angles (45 degrees or close to it), but still gives a
+    // better approximation of screen-space distances in the presence of non-
+    // uniform scaling for other rotations.
+    //
+    // Get signed distance from local rect bounds.
+    vec2 d = signed_distance_rect_xy(
+        local_pos,
+        vTransformBounds.xy,
+        vTransformBounds.zw
+    );
+
+    // Find the appropriate distance to apply the AA smoothstep over.
+    vec2 aa_range = compute_aa_range_xy(local_pos);
+
+    // Only apply AA to fragments outside the signed distance field.
+    return distance_aa_xy(aa_range, d);
+}
+
+float init_transform_rough_fs(vec2 local_pos) {
+    return point_inside_rect(
+        local_pos,
+        vTransformBounds.xy,
+        vTransformBounds.zw
+    );
+}
+
+#endif //WR_FRAGMENT_SHADER
diff --git a/gfx/wr/webrender/res/yuv.glsl b/gfx/wr/webrender/res/yuv.glsl
new file mode 100644
index 0000000000..ccbfecd086
--- /dev/null
+++ b/gfx/wr/webrender/res/yuv.glsl
@@ -0,0 +1,237 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include shared
+
+#define YUV_FORMAT_NV12             0
+#define YUV_FORMAT_P010             1
+#define YUV_FORMAT_PLANAR           2
+#define YUV_FORMAT_INTERLEAVED      3
+
+//#define YUV_PRECISION mediump
+#define YUV_PRECISION highp
+
+#ifdef WR_VERTEX_SHADER
+
+#ifdef WR_FEATURE_TEXTURE_RECT
+    #define TEX_SIZE_YUV(sampler) vec2(1.0)
+#else
+    #define TEX_SIZE_YUV(sampler) vec2(TEX_SIZE(sampler).xy)
+#endif
+
+// `YuvRangedColorSpace`
+#define YUV_COLOR_SPACE_REC601_NARROW  0
+#define YUV_COLOR_SPACE_REC601_FULL    1
+#define YUV_COLOR_SPACE_REC709_NARROW  2
+#define YUV_COLOR_SPACE_REC709_FULL    3
+#define YUV_COLOR_SPACE_REC2020_NARROW 4
+#define YUV_COLOR_SPACE_REC2020_FULL   5
+#define YUV_COLOR_SPACE_GBR_IDENTITY   6
+
+// The constants added to the Y, U and V components are applied in the fragment shader.
+
+// `rgbFromYuv` from https://jdashg.github.io/misc/colors/from-coeffs.html
+// The matrix is stored in column-major.
+const mat3 RgbFromYuv_Rec601 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.17207, 0.88600,
+  0.70100,-0.35707, 0.00000
+);
+const mat3 RgbFromYuv_Rec709 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.09366, 0.92780,
+  0.78740,-0.23406, 0.00000
+);
+const mat3 RgbFromYuv_Rec2020 = mat3(
+  1.00000, 1.00000, 1.00000,
+  0.00000,-0.08228, 0.94070,
+  0.73730,-0.28568, 0.00000
+);
+
+// The matrix is stored in column-major.
+// Identity is stored as GBR
+const mat3 RgbFromYuv_GbrIdentity = mat3(
+    0.0              ,  1.0,                0.0,
+    0.0              ,  0.0,                1.0,
+    1.0              ,  0.0,                0.0
+);
+
+// -
+
+struct YuvPrimitive {
+    int channel_bit_depth;
+    int color_space;
+    int yuv_format;
+};
+
+struct YuvColorSamplingInfo {
+    mat3 rgb_from_yuv;
+    vec4 packed_zero_one_vals;
+};
+
+struct YuvColorMatrixInfo {
+    vec3 ycbcr_bias;
+    mat3 rgb_from_debiased_ycbrc;
+};
+
+// -
+
+vec4 yuv_channel_zero_one_identity(int bit_depth, float channel_max) {
+    float all_ones_normalized = float((1 << bit_depth) - 1) / channel_max;
+    return vec4(0.0, 0.0, all_ones_normalized, all_ones_normalized);
+}
+
+vec4 yuv_channel_zero_one_narrow_range(int bit_depth, float channel_max) {
+    // Note: 512/1023 != 128/255
+    ivec4 zero_one_ints = ivec4(16, 128, 235, 240) << (bit_depth - 8);
+    return vec4(zero_one_ints) / channel_max;
+}
+
+vec4 yuv_channel_zero_one_full_range(int bit_depth, float channel_max) {
+    vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, channel_max);
+    vec4 identity = yuv_channel_zero_one_identity(bit_depth, channel_max);
+    return vec4(0.0, narrow.y, identity.z, identity.w);
+}
+
+YuvColorSamplingInfo get_yuv_color_info(YuvPrimitive prim) {
+    float channel_max = 255.0;
+    if (prim.channel_bit_depth > 8) {
+        if (prim.yuv_format == YUV_FORMAT_P010) {
+            // This is an MSB format.
+            channel_max = float((1 << prim.channel_bit_depth) - 1);
+        } else {
+            // For >8bpc, we get the low bits, not the high bits:
+            // 10bpc(1.0): 0b0000_0011_1111_1111
+            channel_max = 65535.0;
+        }
+    }
+    if (prim.color_space == YUV_COLOR_SPACE_REC601_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec601,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC601_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec601,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC709_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec709,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC709_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec709,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC2020_NARROW) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
+                yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
+    } else if (prim.color_space == YUV_COLOR_SPACE_REC2020_FULL) {
+        return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
+                yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
+
+    } else {
+        // Identity
+        return YuvColorSamplingInfo(RgbFromYuv_GbrIdentity,
+                yuv_channel_zero_one_identity(prim.channel_bit_depth, channel_max));
+    }
+}
+
+YuvColorMatrixInfo get_rgb_from_ycbcr_info(YuvPrimitive prim) {
+    YuvColorSamplingInfo info = get_yuv_color_info(prim);
+
+    vec2 zero = info.packed_zero_one_vals.xy;
+    vec2 one = info.packed_zero_one_vals.zw;
+    // Such that yuv_value = (ycbcr_sample - zero) / (one - zero)
+    vec2 scale = 1.0 / (one - zero);
+
+    YuvColorMatrixInfo mat_info;
+    mat_info.ycbcr_bias = zero.xyy;
+    mat3 yuv_from_debiased_ycbcr = mat3(scale.x,     0.0,     0.0,
+                                            0.0, scale.y,     0.0,
+                                            0.0,     0.0, scale.y);
+    mat_info.rgb_from_debiased_ycbrc = info.rgb_from_yuv * yuv_from_debiased_ycbcr;
+    return mat_info;
+}
+
+void write_uv_rect(
+    vec2 uv0,
+    vec2 uv1,
+    vec2 f,
+    vec2 texture_size,
+    out vec2 uv,
+    out vec4 uv_bounds
+) {
+    uv = mix(uv0, uv1, f);
+
+    uv_bounds = vec4(uv0 + vec2(0.5), uv1 - vec2(0.5));
+
+    #ifndef WR_FEATURE_TEXTURE_RECT
+        uv /= texture_size;
+        uv_bounds /= texture_size.xyxy;
+    #endif
+}
+#endif
+
+#ifdef WR_FRAGMENT_SHADER
+
+vec4 sample_yuv(
+    int format,
+    YUV_PRECISION vec3 ycbcr_bias,
+    YUV_PRECISION mat3 rgb_from_debiased_ycbrc,
+    vec2 in_uv_y,
+    vec2 in_uv_u,
+    vec2 in_uv_v,
+    vec4 uv_bounds_y,
+    vec4 uv_bounds_u,
+    vec4 uv_bounds_v
+) {
+    YUV_PRECISION vec3 ycbcr_sample;
+
+    switch (format) {
+        case YUV_FORMAT_PLANAR:
+            {
+                // The yuv_planar format should have this third texture coordinate.
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                vec2 uv_u = clamp(in_uv_u, uv_bounds_u.xy, uv_bounds_u.zw);
+                vec2 uv_v = clamp(in_uv_v, uv_bounds_v.xy, uv_bounds_v.zw);
+                ycbcr_sample.x = TEX_SAMPLE(sColor0, uv_y).r;
+                ycbcr_sample.y = TEX_SAMPLE(sColor1, uv_u).r;
+                ycbcr_sample.z = TEX_SAMPLE(sColor2, uv_v).r;
+            }
+            break;
+
+        case YUV_FORMAT_NV12:
+        case YUV_FORMAT_P010:
+            {
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                vec2 uv_uv = clamp(in_uv_u, uv_bounds_u.xy, uv_bounds_u.zw);
+                ycbcr_sample.x = TEX_SAMPLE(sColor0, uv_y).r;
+                ycbcr_sample.yz = TEX_SAMPLE(sColor1, uv_uv).rg;
+            }
+            break;
+
+        case YUV_FORMAT_INTERLEAVED:
+            {
+                // "The Y, Cb and Cr color channels within the 422 data are mapped into
+                // the existing green, blue and red color channels."
+                // https://www.khronos.org/registry/OpenGL/extensions/APPLE/APPLE_rgb_422.txt
+                vec2 uv_y = clamp(in_uv_y, uv_bounds_y.xy, uv_bounds_y.zw);
+                ycbcr_sample = TEX_SAMPLE(sColor0, uv_y).gbr;
+            }
+            break;
+
+        default:
+            ycbcr_sample = vec3(0.0);
+            break;
+    }
+    //if (true) return vec4(ycbcr_sample, 1.0);
+
+    // See the YuvColorMatrix definition for an explanation of where the constants come from.
+    YUV_PRECISION vec3 rgb = rgb_from_debiased_ycbrc * (ycbcr_sample - ycbcr_bias);
+
+    #if defined(WR_FEATURE_ALPHA_PASS) && defined(SWGL_CLIP_MASK)
+        // Avoid out-of-range RGB values that can mess with blending. These occur due to invalid
+        // YUV values outside the mappable space that never the less can be generated.
+        rgb = clamp(rgb, 0.0, 1.0);
+    #endif
+    return vec4(rgb, 1.0);
+}
+#endif