8 files changed, 11474 insertions, 0 deletions
diff --git a/gfx/wr/webrender/src/renderer/debug.rs b/gfx/wr/webrender/src/renderer/debug.rs
new file mode 100644
index 0000000000..7e16d15d76
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/debug.rs
@@ -0,0 +1,415 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ColorU, ImageFormat, ImageBufferKind};
+use api::units::*;
+use crate::debug_font_data;
+use crate::device::{Device, Program, Texture, TextureSlot, VertexDescriptor, ShaderError, VAO};
+use crate::device::{TextureFilter, VertexAttribute, VertexAttributeKind, VertexUsageHint};
+use euclid::{Point2D, Rect, Size2D, Transform3D, default};
+use crate::internal_types::Swizzle;
+use std::f32;
+
+#[derive(Debug, Copy, Clone)]
+enum DebugSampler {
+    Font,
+}
+
+impl Into<TextureSlot> for DebugSampler {
+    fn into(self) -> TextureSlot {
+        match self {
+            DebugSampler::Font => TextureSlot(0),
+        }
+    }
+}
+
+const DESC_FONT: VertexDescriptor = VertexDescriptor {
+    vertex_attributes: &[
+        VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+        VertexAttribute {
+            name: "aColor",
+            count: 4,
+            kind: VertexAttributeKind::U8Norm,
+        },
+        VertexAttribute {
+            name: "aColorTexCoord",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+    ],
+    instance_attributes: &[],
+};
+
+const DESC_COLOR: VertexDescriptor = VertexDescriptor {
+    vertex_attributes: &[
+        VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::F32,
+        },
+        VertexAttribute {
+            name: "aColor",
+            count: 4,
+            kind: VertexAttributeKind::U8Norm,
+        },
+    ],
+    instance_attributes: &[],
+};
+
+#[repr(C)]
+pub struct DebugFontVertex {
+    pub x: f32,
+    pub y: f32,
+    pub color: ColorU,
+    pub u: f32,
+    pub v: f32,
+}
+
+impl DebugFontVertex {
+    pub fn new(x: f32, y: f32, u: f32, v: f32, color: ColorU) -> DebugFontVertex {
+        DebugFontVertex { x, y, color, u, v }
+    }
+}
+
+#[repr(C)]
+pub struct DebugColorVertex {
+    pub x: f32,
+    pub y: f32,
+    pub color: ColorU,
+}
+
+impl DebugColorVertex {
+    pub fn new(x: f32, y: f32, color: ColorU) -> DebugColorVertex {
+        DebugColorVertex { x, y, color }
+    }
+}
+
+pub struct DebugRenderer {
+    font_vertices: Vec<DebugFontVertex>,
+    font_indices: Vec<u32>,
+    font_program: Program,
+    font_vao: VAO,
+    font_texture: Texture,
+
+    tri_vertices: Vec<DebugColorVertex>,
+    tri_indices: Vec<u32>,
+    tri_vao: VAO,
+    line_vertices: Vec<DebugColorVertex>,
+    line_vao: VAO,
+    color_program: Program,
+}
+
+impl DebugRenderer {
+    pub fn new(device: &mut Device) -> Result<Self, ShaderError> {
+        let font_program = device.create_program_linked(
+            "debug_font",
+            &[],
+            &DESC_FONT,
+        )?;
+        device.bind_program(&font_program);
+        device.bind_shader_samplers(&font_program, &[("sColor0", DebugSampler::Font)]);
+
+        let color_program = device.create_program_linked(
+            "debug_color",
+            &[],
+            &DESC_COLOR,
+        )?;
+
+        let font_vao = device.create_vao(&DESC_FONT, 1);
+        let line_vao = device.create_vao(&DESC_COLOR, 1);
+        let tri_vao = device.create_vao(&DESC_COLOR, 1);
+
+        let font_texture = device.create_texture(
+            ImageBufferKind::Texture2D,
+            ImageFormat::R8,
+            debug_font_data::BMP_WIDTH,
+            debug_font_data::BMP_HEIGHT,
+            TextureFilter::Linear,
+            None,
+        );
+        device.upload_texture_immediate(
+            &font_texture,
+            &debug_font_data::FONT_BITMAP
+        );
+
+        Ok(DebugRenderer {
+            font_vertices: Vec::new(),
+            font_indices: Vec::new(),
+            line_vertices: Vec::new(),
+            tri_vao,
+            tri_vertices: Vec::new(),
+            tri_indices: Vec::new(),
+            font_program,
+            color_program,
+            font_vao,
+            line_vao,
+            font_texture,
+        })
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        device.delete_texture(self.font_texture);
+        device.delete_program(self.font_program);
+        device.delete_program(self.color_program);
+        device.delete_vao(self.tri_vao);
+        device.delete_vao(self.line_vao);
+        device.delete_vao(self.font_vao);
+    }
+
+    pub fn line_height(&self) -> f32 {
+        debug_font_data::FONT_SIZE as f32 * 1.1
+    }
+
+    /// Draws a line of text at the provided starting coordinates.
+    ///
+    /// If |bounds| is specified, glyphs outside the bounds are discarded.
+    ///
+    /// Y-coordinates is relative to screen top, along with everything else in
+    /// this file.
+    pub fn add_text(
+        &mut self,
+        x: f32,
+        y: f32,
+        text: &str,
+        color: ColorU,
+        bounds: Option<DeviceRect>,
+    ) -> default::Rect<f32> {
+        let mut x_start = x;
+        let ipw = 1.0 / debug_font_data::BMP_WIDTH as f32;
+        let iph = 1.0 / debug_font_data::BMP_HEIGHT as f32;
+
+        let mut min_x = f32::MAX;
+        let mut max_x = -f32::MAX;
+        let mut min_y = f32::MAX;
+        let mut max_y = -f32::MAX;
+
+        for c in text.chars() {
+            let c = c as usize - debug_font_data::FIRST_GLYPH_INDEX as usize;
+            if c < debug_font_data::GLYPHS.len() {
+                let glyph = &debug_font_data::GLYPHS[c];
+
+                let x0 = (x_start + glyph.xo + 0.5).floor();
+                let y0 = (y + glyph.yo + 0.5).floor();
+
+                let x1 = x0 + glyph.x1 as f32 - glyph.x0 as f32;
+                let y1 = y0 + glyph.y1 as f32 - glyph.y0 as f32;
+
+                // If either corner of the glyph will end up out of bounds, drop it.
+                if let Some(b) = bounds {
+                    let rect = DeviceRect {
+                        min: DevicePoint::new(x0, y0),
+                        max: DevicePoint::new(x1, y1),
+                    };
+                    if !b.contains_box(&rect) {
+                        continue;
+                    }
+                }
+
+                let s0 = glyph.x0 as f32 * ipw;
+                let t0 = glyph.y0 as f32 * iph;
+                let s1 = glyph.x1 as f32 * ipw;
+                let t1 = glyph.y1 as f32 * iph;
+
+                x_start += glyph.xa;
+
+                let vertex_count = self.font_vertices.len() as u32;
+
+                self.font_vertices
+                    .push(DebugFontVertex::new(x0, y0, s0, t0, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x1, y0, s1, t0, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x0, y1, s0, t1, color));
+                self.font_vertices
+                    .push(DebugFontVertex::new(x1, y1, s1, t1, color));
+
+                self.font_indices.push(vertex_count + 0);
+                self.font_indices.push(vertex_count + 1);
+                self.font_indices.push(vertex_count + 2);
+                self.font_indices.push(vertex_count + 2);
+                self.font_indices.push(vertex_count + 1);
+                self.font_indices.push(vertex_count + 3);
+
+                min_x = min_x.min(x0);
+                max_x = max_x.max(x1);
+                min_y = min_y.min(y0);
+                max_y = max_y.max(y1);
+            }
+        }
+
+        Rect::new(
+            Point2D::new(min_x, min_y),
+            Size2D::new(max_x - min_x, max_y - min_y),
+        )
+    }
+
+    pub fn add_quad(
+        &mut self,
+        x0: f32,
+        y0: f32,
+        x1: f32,
+        y1: f32,
+        color_top: ColorU,
+        color_bottom: ColorU,
+    ) {
+        let vertex_count = self.tri_vertices.len() as u32;
+
+        self.tri_vertices
+            .push(DebugColorVertex::new(x0, y0, color_top));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x1, y0, color_top));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x0, y1, color_bottom));
+        self.tri_vertices
+            .push(DebugColorVertex::new(x1, y1, color_bottom));
+
+        self.tri_indices.push(vertex_count + 0);
+        self.tri_indices.push(vertex_count + 1);
+        self.tri_indices.push(vertex_count + 2);
+        self.tri_indices.push(vertex_count + 2);
+        self.tri_indices.push(vertex_count + 1);
+        self.tri_indices.push(vertex_count + 3);
+    }
+
+    #[allow(dead_code)]
+    pub fn add_line(&mut self, x0: i32, y0: i32, color0: ColorU, x1: i32, y1: i32, color1: ColorU) {
+        self.line_vertices
+            .push(DebugColorVertex::new(x0 as f32, y0 as f32, color0));
+        self.line_vertices
+            .push(DebugColorVertex::new(x1 as f32, y1 as f32, color1));
+    }
+
+
+    pub fn add_rect(&mut self, rect: &DeviceIntRect, color: ColorU) {
+        let p0 = rect.min;
+        let p1 = rect.max;
+        self.add_line(p0.x, p0.y, color, p1.x, p0.y, color);
+        self.add_line(p1.x, p0.y, color, p1.x, p1.y, color);
+        self.add_line(p1.x, p1.y, color, p0.x, p1.y, color);
+        self.add_line(p0.x, p1.y, color, p0.x, p0.y, color);
+    }
+
+    pub fn render(
+        &mut self,
+        device: &mut Device,
+        viewport_size: Option<DeviceIntSize>,
+        scale: f32,
+        surface_origin_is_top_left: bool,
+    ) {
+        if let Some(viewport_size) = viewport_size {
+            device.disable_depth();
+            device.set_blend(true);
+            device.set_blend_mode_premultiplied_alpha();
+
+            let (bottom, top) = if surface_origin_is_top_left {
+                (0.0, viewport_size.height as f32 * scale)
+            } else {
+                (viewport_size.height as f32 * scale, 0.0)
+            };
+
+            let projection = Transform3D::ortho(
+                0.0,
+                viewport_size.width as f32 * scale,
+                bottom,
+                top,
+                device.ortho_near_plane(),
+                device.ortho_far_plane(),
+            );
+
+            // Triangles
+            if !self.tri_vertices.is_empty() {
+                device.bind_program(&self.color_program);
+                device.set_uniforms(&self.color_program, &projection);
+                device.bind_vao(&self.tri_vao);
+                device.update_vao_indices(&self.tri_vao, &self.tri_indices, VertexUsageHint::Dynamic);
+                device.update_vao_main_vertices(
+                    &self.tri_vao,
+                    &self.tri_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_triangles_u32(0, self.tri_indices.len() as i32);
+            }
+
+            // Lines
+            if !self.line_vertices.is_empty() {
+                device.bind_program(&self.color_program);
+                device.set_uniforms(&self.color_program, &projection);
+                device.bind_vao(&self.line_vao);
+                device.update_vao_main_vertices(
+                    &self.line_vao,
+                    &self.line_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_nonindexed_lines(0, self.line_vertices.len() as i32);
+            }
+
+            // Glyph
+            if !self.font_indices.is_empty() {
+                device.bind_program(&self.font_program);
+                device.set_uniforms(&self.font_program, &projection);
+                device.bind_texture(DebugSampler::Font, &self.font_texture, Swizzle::default());
+                device.bind_vao(&self.font_vao);
+                device.update_vao_indices(&self.font_vao, &self.font_indices, VertexUsageHint::Dynamic);
+                device.update_vao_main_vertices(
+                    &self.font_vao,
+                    &self.font_vertices,
+                    VertexUsageHint::Dynamic,
+                );
+                device.draw_triangles_u32(0, self.font_indices.len() as i32);
+            }
+        }
+
+        self.font_indices.clear();
+        self.font_vertices.clear();
+        self.line_vertices.clear();
+        self.tri_vertices.clear();
+        self.tri_indices.clear();
+    }
+}
+
+pub struct LazyInitializedDebugRenderer {
+    debug_renderer: Option<DebugRenderer>,
+    failed: bool,
+}
+
+impl LazyInitializedDebugRenderer {
+    pub fn new() -> Self {
+        Self {
+            debug_renderer: None,
+            failed: false,
+        }
+    }
+
+    pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
+        if self.failed {
+            return None;
+        }
+        if self.debug_renderer.is_none() {
+            match DebugRenderer::new(device) {
+                Ok(renderer) => { self.debug_renderer = Some(renderer); }
+                Err(_) => {
+                    // The shader compilation code already logs errors.
+                    self.failed = true;
+                }
+            }
+        }
+
+        self.debug_renderer.as_mut()
+    }
+
+    /// Returns mut ref to `debug::DebugRenderer` if one already exists, otherwise returns `None`.
+    pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
+        self.debug_renderer.as_mut()
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        if let Some(debug_renderer) = self.debug_renderer {
+            debug_renderer.deinit(device);
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/gpu_buffer.rs b/gfx/wr/webrender/src/renderer/gpu_buffer.rs
new file mode 100644
index 0000000000..05543d51ee
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/gpu_buffer.rs
@@ -0,0 +1,326 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+
+    TODO:
+        Recycle GpuBuffers in a pool (support return from render thread)
+        Efficiently allow writing to buffer (better push interface)
+        Support other texel types (e.g. i32)
+
+ */
+
+use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
+use api::units::{DeviceIntRect, DeviceIntSize, LayoutRect, PictureRect, DeviceRect};
+use api::{PremultipliedColorF};
+use crate::device::Texel;
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
+
+
+unsafe impl Texel for GpuBufferBlock {}
+
+/// A single texel in RGBAF32 texture - 16 bytes.
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBufferBlock {
+    data: [f32; 4],
+}
+
+#[derive(Copy, Debug, Clone, MallocSizeOf, Eq, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBufferAddress {
+    pub u: u16,
+    pub v: u16,
+}
+
+impl GpuBufferAddress {
+    #[allow(dead_code)]
+    pub fn as_int(self) -> i32 {
+        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
+        //           In the future, we can change the PrimitiveInstanceData struct
+        //           to use 2x u16 for the vertex attribute instead of an i32.
+        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
+    }
+}
+
+impl GpuBufferBlock {
+    pub const EMPTY: Self = GpuBufferBlock { data: [0.0; 4] };
+}
+
+impl Into<GpuBufferBlock> for LayoutRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x,
+                self.min.y,
+                self.max.x,
+                self.max.y,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for PictureRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x,
+                self.min.y,
+                self.max.x,
+                self.max.y,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for DeviceRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x,
+                self.min.y,
+                self.max.x,
+                self.max.y,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for PremultipliedColorF {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.r,
+                self.g,
+                self.b,
+                self.a,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for DeviceIntRect {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: [
+                self.min.x as f32,
+                self.min.y as f32,
+                self.max.x as f32,
+                self.max.y as f32,
+            ],
+        }
+    }
+}
+
+impl Into<GpuBufferBlock> for [f32; 4] {
+    fn into(self) -> GpuBufferBlock {
+        GpuBufferBlock {
+            data: self,
+        }
+    }
+}
+
+/// Record a patch to the GPU buffer for a render task
+struct DeferredBlock {
+    task_id: RenderTaskId,
+    index: usize,
+}
+
+/// Interface to allow writing multiple GPU blocks, possibly of different types
+pub struct GpuBufferWriter<'a> {
+    buffer: &'a mut Vec<GpuBufferBlock>,
+    deferred: &'a mut Vec<DeferredBlock>,
+    index: usize,
+    block_count: usize,
+}
+
+impl<'a> GpuBufferWriter<'a> {
+    fn new(
+        buffer: &'a mut Vec<GpuBufferBlock>,
+        deferred: &'a mut Vec<DeferredBlock>,
+        index: usize,
+        block_count: usize,
+    ) -> Self {
+        GpuBufferWriter {
+            buffer,
+            deferred,
+            index,
+            block_count,
+        }
+    }
+
+    /// Push one (16 byte) block of data in to the writer
+    pub fn push_one<B>(&mut self, block: B) where B: Into<GpuBufferBlock> {
+        self.buffer.push(block.into());
+    }
+
+    /// Push a reference to a render task in to the writer. Once the render
+    /// task graph is resolved, this will be patched with the UV rect of the task
+    pub fn push_render_task(&mut self, task_id: RenderTaskId) {
+        self.deferred.push(DeferredBlock {
+            task_id,
+            index: self.buffer.len(),
+        });
+        self.buffer.push(GpuBufferBlock::EMPTY);
+    }
+
+    /// Close this writer, returning the GPU address of this set of block(s).
+    pub fn finish(self) -> GpuBufferAddress {
+        assert_eq!(self.buffer.len(), self.index + self.block_count);
+
+        GpuBufferAddress {
+            u: (self.index % MAX_VERTEX_TEXTURE_WIDTH) as u16,
+            v: (self.index / MAX_VERTEX_TEXTURE_WIDTH) as u16,
+        }
+    }
+}
+
+impl<'a> Drop for GpuBufferWriter<'a> {
+    fn drop(&mut self) {
+        assert_eq!(self.buffer.len(), self.index + self.block_count, "Claimed block_count was not written");
+    }
+}
+
+pub struct GpuBufferBuilder {
+    data: Vec<GpuBufferBlock>,
+    deferred: Vec<DeferredBlock>,
+}
+
+impl GpuBufferBuilder {
+    pub fn new() -> Self {
+        GpuBufferBuilder {
+            data: Vec::new(),
+            deferred: Vec::new(),
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn push(
+        &mut self,
+        blocks: &[GpuBufferBlock],
+    ) -> GpuBufferAddress {
+        assert!(blocks.len() <= MAX_VERTEX_TEXTURE_WIDTH);
+
+        if (self.data.len() % MAX_VERTEX_TEXTURE_WIDTH) + blocks.len() > MAX_VERTEX_TEXTURE_WIDTH {
+            while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 {
+                self.data.push(GpuBufferBlock::EMPTY);
+            }
+        }
+
+        let index = self.data.len();
+
+        self.data.extend_from_slice(blocks);
+
+        GpuBufferAddress {
+            u: (index % MAX_VERTEX_TEXTURE_WIDTH) as u16,
+            v: (index / MAX_VERTEX_TEXTURE_WIDTH) as u16,
+        }
+    }
+
+    /// Begin writing a specific number of blocks
+    pub fn write_blocks(
+        &mut self,
+        block_count: usize,
+    ) -> GpuBufferWriter {
+        assert!(block_count <= MAX_VERTEX_TEXTURE_WIDTH);
+
+        if (self.data.len() % MAX_VERTEX_TEXTURE_WIDTH) + block_count > MAX_VERTEX_TEXTURE_WIDTH {
+            while self.data.len() % MAX_VERTEX_TEXTURE_WIDTH != 0 {
+                self.data.push(GpuBufferBlock::EMPTY);
+            }
+        }
+
+        let index = self.data.len();
+
+        GpuBufferWriter::new(
+            &mut self.data,
+            &mut self.deferred,
+            index,
+            block_count,
+        )
+    }
+
+    pub fn finalize(
+        mut self,
+        render_tasks: &RenderTaskGraph,
+    ) -> GpuBuffer {
+        let required_len = (self.data.len() + MAX_VERTEX_TEXTURE_WIDTH-1) & !(MAX_VERTEX_TEXTURE_WIDTH-1);
+
+        for _ in 0 .. required_len - self.data.len() {
+            self.data.push(GpuBufferBlock::EMPTY);
+        }
+
+        let len = self.data.len();
+        assert!(len % MAX_VERTEX_TEXTURE_WIDTH == 0);
+
+        // At this point, we know that the render task graph has been built, and we can
+        // query the location of any dynamic (render target) or static (texture cache)
+        // task. This allows us to patch the UV rects in to the GPU buffer before upload
+        // to the GPU.
+        for block in self.deferred.drain(..) {
+            let render_task = &render_tasks[block.task_id];
+            let target_rect = render_task.get_target_rect();
+            self.data[block.index] = target_rect.into();
+        }
+
+        GpuBuffer {
+            data: self.data,
+            size: DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, (len / MAX_VERTEX_TEXTURE_WIDTH) as i32),
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GpuBuffer {
+    pub data: Vec<GpuBufferBlock>,
+    pub size: DeviceIntSize,
+}
+
+impl GpuBuffer {
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+}
+
+
+#[test]
+fn test_gpu_buffer_sizing_push() {
+    let render_task_graph = RenderTaskGraph::new_for_testing();
+    let mut builder = GpuBufferBuilder::new();
+
+    let row = vec![GpuBufferBlock::EMPTY; MAX_VERTEX_TEXTURE_WIDTH];
+    builder.push(&row);
+
+    builder.push(&[GpuBufferBlock::EMPTY]);
+    builder.push(&[GpuBufferBlock::EMPTY]);
+
+    let buffer = builder.finalize(&render_task_graph);
+    assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2);
+}
+
+#[test]
+fn test_gpu_buffer_sizing_writer() {
+    let render_task_graph = RenderTaskGraph::new_for_testing();
+    let mut builder = GpuBufferBuilder::new();
+
+    let mut writer = builder.write_blocks(MAX_VERTEX_TEXTURE_WIDTH);
+    for _ in 0 .. MAX_VERTEX_TEXTURE_WIDTH {
+        writer.push_one(GpuBufferBlock::EMPTY);
+    }
+    writer.finish();
+
+    let mut writer = builder.write_blocks(1);
+    writer.push_one(GpuBufferBlock::EMPTY);
+    writer.finish();
+
+    let mut writer = builder.write_blocks(1);
+    writer.push_one(GpuBufferBlock::EMPTY);
+    writer.finish();
+
+    let buffer = builder.finalize(&render_task_graph);
+    assert_eq!(buffer.data.len(), MAX_VERTEX_TEXTURE_WIDTH * 2);
+}
diff --git a/gfx/wr/webrender/src/renderer/gpu_cache.rs b/gfx/wr/webrender/src/renderer/gpu_cache.rs
new file mode 100644
index 0000000000..fde649cb08
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/gpu_cache.rs
@@ -0,0 +1,525 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use std::{cmp, mem};
+use api::units::*;
+use malloc_size_of::MallocSizeOfOps;
+use crate::{
+    device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
+    gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
+    internal_types::{RenderTargetInfo, Swizzle},
+    prim_store::DeferredResolve,
+    profiler,
+    render_api::MemoryReport,
+    internal_types::FrameId,
+};
+
+/// Enabling this toggle would force the GPU cache scattered texture to
+/// be resized every frame, which enables GPU debuggers to see if this
+/// is performed correctly.
+const GPU_CACHE_RESIZE_TEST: bool = false;
+
+/// Tracks the state of each row in the GPU cache texture.
+struct CacheRow {
+    /// Mirrored block data on CPU for this row. We store a copy of
+    /// the data on the CPU side to improve upload batching.
+    cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
+    /// The first offset in this row that is dirty.
+    min_dirty: u16,
+    /// The last offset in this row that is dirty.
+    max_dirty: u16,
+}
+
+impl CacheRow {
+    fn new() -> Self {
+        CacheRow {
+            cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
+            min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
+            max_dirty: 0,
+        }
+    }
+
+    fn is_dirty(&self) -> bool {
+        return self.min_dirty < self.max_dirty;
+    }
+
+    fn clear_dirty(&mut self) {
+        self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
+        self.max_dirty = 0;
+    }
+
+    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
+        self.min_dirty = self.min_dirty.min(block_offset as _);
+        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
+    }
+
+    fn dirty_blocks(&self) -> &[GpuBlockData] {
+        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
+    }
+}
+
+/// The bus over which CPU and GPU versions of the GPU cache
+/// get synchronized.
+enum GpuCacheBus {
+    /// PBO-based updates, currently operate on a row granularity.
+    /// Therefore, are subject to fragmentation issues.
+    PixelBuffer {
+        /// Per-row data.
+        rows: Vec<CacheRow>,
+    },
+    /// Shader-based scattering updates. Currently rendered by a set
+    /// of points into the GPU texture, each carrying a `GpuBlockData`.
+    Scatter {
+        /// Special program to run the scattered update.
+        program: Program,
+        /// VAO containing the source vertex buffers.
+        vao: CustomVAO,
+        /// VBO for positional data, supplied as normalized `u16`.
+        buf_position: VBO<[u16; 2]>,
+        /// VBO for gpu block data.
+        buf_value: VBO<GpuBlockData>,
+        /// Currently stored block count.
+        count: usize,
+    },
+}
+
+/// The device-specific representation of the cache texture in gpu_cache.rs
+pub struct GpuCacheTexture {
+    texture: Option<Texture>,
+    bus: GpuCacheBus,
+}
+
+impl GpuCacheTexture {
+    /// Ensures that we have an appropriately-sized texture.
+    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
+        // If we already have a texture that works, we're done.
+        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
+            if GPU_CACHE_RESIZE_TEST {
+                // Special debug mode - resize the texture even though it's fine.
+            } else {
+                return;
+            }
+        }
+
+        // Take the old texture, if any.
+        let blit_source = self.texture.take();
+
+        // Create the new texture.
+        assert!(height >= 2, "Height is too small for ANGLE");
+        let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
+        // GpuCacheBus::Scatter always requires the texture to be a render target. For
+        // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
+        // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
+        // is not. glCopyImageSubData does not require a render target to copy the texture
+        // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
+        // we simply re-upload the entire contents rather than copying upon resize.
+        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
+        let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
+        let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
+            && (supports_copy_image_sub_data || !supports_color_buffer_float)
+        {
+            None
+        } else {
+            Some(RenderTargetInfo { has_depth: false })
+        };
+        let mut texture = device.create_texture(
+            api::ImageBufferKind::Texture2D,
+            api::ImageFormat::RGBAF32,
+            new_size.width,
+            new_size.height,
+            TextureFilter::Nearest,
+            rt_info,
+        );
+
+        // Copy the contents of the previous texture, if applicable.
+        if let Some(blit_source) = blit_source {
+            if !supports_copy_image_sub_data && !supports_color_buffer_float {
+                // Cannot copy texture, so must re-upload everything.
+                match self.bus {
+                    GpuCacheBus::PixelBuffer { ref mut rows } => {
+                        for row in rows {
+                            row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
+                        }
+                    }
+                    GpuCacheBus::Scatter { .. } => {
+                        panic!("Texture must be copyable to use scatter GPU cache bus method");
+                    }
+                }
+            } else {
+                device.copy_entire_texture(&mut texture, &blit_source);
+            }
+            device.delete_texture(blit_source);
+        }
+
+        self.texture = Some(texture);
+    }
+
+    pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
+        use super::desc::GPU_CACHE_UPDATE;
+
+        let bus = if use_scatter {
+            assert!(
+                device.get_capabilities().supports_color_buffer_float,
+                "GpuCache scatter method requires EXT_color_buffer_float",
+            );
+            let program = device.create_program_linked(
+                "gpu_cache_update",
+                &[],
+                &GPU_CACHE_UPDATE,
+            )?;
+            let buf_position = device.create_vbo();
+            let buf_value = device.create_vbo();
+            //Note: the vertex attributes have to be supplied in the same order
+            // as for program creation, but each assigned to a different stream.
+            let vao = device.create_custom_vao(&[
+                buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+                buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+            ]);
+            GpuCacheBus::Scatter {
+                program,
+                vao,
+                buf_position,
+                buf_value,
+                count: 0,
+            }
+        } else {
+            GpuCacheBus::PixelBuffer {
+                rows: Vec::new(),
+            }
+        };
+
+        Ok(GpuCacheTexture {
+            texture: None,
+            bus,
+        })
+    }
+
+    pub fn deinit(mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+        if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
+            device.delete_program(program);
+            device.delete_custom_vao(vao);
+            device.delete_vbo(buf_position);
+            device.delete_vbo(buf_value);
+        }
+    }
+
+    pub fn get_height(&self) -> i32 {
+        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
+    }
+
+    #[cfg(feature = "capture")]
+    pub fn get_texture(&self) -> &Texture {
+        self.texture.as_ref().unwrap()
+    }
+
+    fn prepare_for_updates(
+        &mut self,
+        device: &mut Device,
+        total_block_count: usize,
+        max_height: i32,
+    ) {
+        self.ensure_texture(device, max_height);
+        match self.bus {
+            GpuCacheBus::PixelBuffer { .. } => {},
+            GpuCacheBus::Scatter {
+                ref mut buf_position,
+                ref mut buf_value,
+                ref mut count,
+                ..
+            } => {
+                *count = 0;
+                if total_block_count > buf_value.allocated_count() {
+                    device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
+                    device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
+                }
+            }
+        }
+    }
+
+    pub fn invalidate(&mut self) {
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                info!("Invalidating GPU caches");
+                for row in rows {
+                    row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
+                }
+            }
+            GpuCacheBus::Scatter { .. } => {
+                warn!("Unable to invalidate scattered GPU cache");
+            }
+        }
+    }
+
+    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            let row = address.v as usize;
+
+                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
+                            // rows to apply this patch.
+                            while rows.len() <= row {
+                                // Add a new row.
+                                rows.push(CacheRow::new());
+                            }
+
+                            // Copy the blocks from the patch array in the shadow CPU copy.
+                            let block_offset = address.u as usize;
+                            let data = &mut rows[row].cpu_blocks;
+                            for i in 0 .. block_count {
+                                data[block_offset + i] = updates.blocks[block_index + i];
+                            }
+
+                            // This row is dirty (needs to be updated in GPU texture).
+                            rows[row].add_dirty(block_offset, block_count);
+                        }
+                    }
+                }
+            }
+            GpuCacheBus::Scatter {
+                ref buf_position,
+                ref buf_value,
+                ref mut count,
+                ..
+            } => {
+                //TODO: re-use this heap allocation
+                // Unused positions will be left as 0xFFFF, which translates to
+                // (1.0, 1.0) in the vertex output position and gets culled out
+                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
+                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
+
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            // Convert the absolute texel position into normalized
+                            let y = ((2*address.v as usize + 1) << 15) / size.height;
+                            for i in 0 .. block_count {
+                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
+                                position_data[block_index + i] = [x as _, y as _];
+                            }
+                        }
+                    }
+                }
+
+                device.fill_vbo(buf_value, &updates.blocks, *count);
+                device.fill_vbo(buf_position, &position_data, *count);
+                *count += position_data.len();
+            }
+        }
+    }
+
+    fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
+        let texture = self.texture.as_ref().unwrap();
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows } => {
+                let rows_dirty = rows
+                    .iter()
+                    .filter(|row| row.is_dirty())
+                    .count();
+                if rows_dirty == 0 {
+                    return 0
+                }
+
+                let mut uploader = device.upload_texture(pbo_pool);
+
+                for (row_index, row) in rows.iter_mut().enumerate() {
+                    if !row.is_dirty() {
+                        continue;
+                    }
+
+                    let blocks = row.dirty_blocks();
+                    let rect = DeviceIntRect::from_origin_and_size(
+                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
+                        DeviceIntSize::new(blocks.len() as i32, 1),
+                    );
+
+                    uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
+
+                    row.clear_dirty();
+                }
+
+                uploader.flush(device);
+
+                rows_dirty
+            }
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
+                device.disable_depth();
+                device.set_blend(false);
+                device.bind_program(program);
+                device.bind_custom_vao(vao);
+                device.bind_draw_target(
+                    DrawTarget::from_texture(
+                        texture,
+                        false,
+                    ),
+                );
+                device.draw_nonindexed_points(0, count as _);
+                0
+            }
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn remove_texture(&mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
+        assert!(self.texture.is_none());
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                let dim = texture.get_dimensions();
+                let blocks = unsafe {
+                    std::slice::from_raw_parts(
+                        data.as_ptr() as *const GpuBlockData,
+                        data.len() / mem::size_of::<GpuBlockData>(),
+                    )
+                };
+                // fill up the CPU cache from the contents we just loaded
+                rows.clear();
+                rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
+                let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
+                debug_assert_eq!(chunks.len(), rows.len());
+                for (row, chunk) in rows.iter_mut().zip(chunks) {
+                    row.cpu_blocks.copy_from_slice(chunk);
+                }
+            }
+            GpuCacheBus::Scatter { .. } => {}
+        }
+        self.texture = Some(texture);
+    }
+
+    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
+            for row in rows.iter() {
+                report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
+            }
+        }
+
+        // GPU cache GPU memory.
+        report.gpu_cache_textures +=
+            self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
+    }
+}
+
+impl super::Renderer {
+    pub fn update_gpu_cache(&mut self) {
+        let _gm = self.gpu_profiler.start_marker("gpu cache update");
+
+        // For an artificial stress test of GPU cache resizing,
+        // always pass an extra update list with at least one block in it.
+        let gpu_cache_height = self.gpu_cache_texture.get_height();
+        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
+            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
+                frame_id: FrameId::INVALID,
+                clear: false,
+                height: gpu_cache_height,
+                blocks: vec![[1f32; 4].into()],
+                updates: Vec::new(),
+                debug_commands: Vec::new(),
+            });
+        }
+
+        let (updated_blocks, max_requested_height) = self
+            .pending_gpu_cache_updates
+            .iter()
+            .fold((0, gpu_cache_height), |(count, height), list| {
+                (count + list.blocks.len(), cmp::max(height, list.height))
+            });
+
+        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
+            self.gpu_cache_overflow = true;
+            self.renderer_errors.push(super::RendererError::MaxTextureSize);
+        }
+
+        // Note: if we decide to switch to scatter-style GPU cache update
+        // permanently, we can have this code nicer with `BufferUploader` kind
+        // of helper, similarly to how `TextureUploader` API is used.
+        self.gpu_cache_texture.prepare_for_updates(
+            &mut self.device,
+            updated_blocks,
+            max_requested_height,
+        );
+
+        for update_list in self.pending_gpu_cache_updates.drain(..) {
+            assert!(update_list.height <= max_requested_height);
+            if update_list.frame_id > self.gpu_cache_frame_id {
+                self.gpu_cache_frame_id = update_list.frame_id
+            }
+            self.gpu_cache_texture
+                .update(&mut self.device, &update_list);
+        }
+
+        self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
+        let updated_rows = self.gpu_cache_texture.flush(
+            &mut self.device,
+            &mut self.texture_upload_pbo_pool
+        );
+        self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
+
+        self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
+        self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
+    }
+
+    pub fn prepare_gpu_cache(
+        &mut self,
+        deferred_resolves: &[DeferredResolve],
+    ) -> Result<(), super::RendererError> {
+        if self.pending_gpu_cache_clear {
+            let use_scatter =
+                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
+            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
+            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
+            old_cache.deinit(&mut self.device);
+            self.pending_gpu_cache_clear = false;
+        }
+
+        let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
+        self.pending_gpu_cache_updates.extend(deferred_update_list);
+
+        self.update_gpu_cache();
+
+        // Note: the texture might have changed during the `update`,
+        // so we need to bind it here.
+        self.device.bind_texture(
+            super::TextureSampler::GpuCache,
+            self.gpu_cache_texture.texture.as_ref().unwrap(),
+            Swizzle::default(),
+        );
+
+        Ok(())
+    }
+
+    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
+        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
+        let size = device_size_as_framebuffer_size(texture.get_dimensions());
+        let mut texels = vec![0; (size.width * size.height * 16) as usize];
+        self.device.begin_frame();
+        self.device.bind_read_target(ReadTarget::from_texture(texture));
+        self.device.read_pixels_into(
+            size.into(),
+            api::ImageFormat::RGBAF32,
+            &mut texels,
+        );
+        self.device.reset_read_target();
+        self.device.end_frame();
+        (texture.get_dimensions(), texels)
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/init.rs b/gfx/wr/webrender/src/renderer/init.rs
new file mode 100644
index 0000000000..767f9c4b9e
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/init.rs
@@ -0,0 +1,788 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{BlobImageHandler, ColorF, IdNamespace, DocumentId, CrashAnnotator};
+use api::{VoidPtrToSizeFn, FontRenderMode, ImageFormat};
+use api::{RenderNotifier, ImageBufferKind};
+use api::units::*;
+use api::channel::unbounded_channel;
+pub use api::DebugFlags;
+
+use crate::render_api::{RenderApiSender, FrameMsg};
+use crate::composite::{CompositorKind, CompositorConfig};
+use crate::device::{
+    UploadMethod, UploadPBOPool, VertexUsageHint, Device, ProgramCache, TextureFilter
+};
+use crate::frame_builder::FrameBuilderConfig;
+use crate::glyph_cache::GlyphCache;
+use glyph_rasterizer::{GlyphRasterizer, SharedFontResources};
+use crate::gpu_types::PrimitiveInstanceData;
+use crate::internal_types::{FastHashMap, FastHashSet, FrameId};
+use crate::picture;
+use crate::profiler::{self, Profiler, TransactionProfile};
+use crate::device::query::{GpuProfiler, GpuDebugMethod};
+use crate::render_backend::RenderBackend;
+use crate::resource_cache::ResourceCache;
+use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels, LowPrioritySceneBuilderThread};
+use crate::texture_cache::{TextureCache, TextureCacheConfig};
+use crate::picture_textures::PictureTextures;
+use crate::renderer::{
+    debug, gpu_cache, vertex, gl,
+    Renderer, DebugOverlayState, BufferDamageTracker, PipelineInfo, TextureResolver,
+    RendererError, ShaderPrecacheFlags, VERTEX_DATA_TEXTURE_COUNT,
+    upload::UploadTexturePool,
+    shade::{Shaders, SharedShaders},
+};
+
+use std::{
+    mem,
+    thread,
+    cell::RefCell,
+    collections::VecDeque,
+    rc::Rc,
+    sync::{Arc, atomic::{AtomicBool, Ordering}},
+    num::NonZeroUsize,
+    path::PathBuf,
+};
+
+use tracy_rs::register_thread_with_profiler;
+use rayon::{ThreadPool, ThreadPoolBuilder};
+use malloc_size_of::MallocSizeOfOps;
+
+/// Use this hint for all vertex data re-initialization. This allows
+/// the driver to better re-use RBOs internally.
+pub const ONE_TIME_USAGE_HINT: VertexUsageHint = VertexUsageHint::Stream;
+
+/// Is only false if no WR instances have ever been created.
+static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);
+
+/// Returns true if a WR instance has ever been initialized in this process.
+pub fn wr_has_been_initialized() -> bool {
+    HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
+}
+
+/// Allows callers to hook in at certain points of the async scene build. These
+/// functions are all called from the scene builder thread.
+pub trait SceneBuilderHooks {
+    /// This is called exactly once, when the scene builder thread is started
+    /// and before it processes anything.
+    fn register(&self);
+    /// This is called before each scene build starts.
+    fn pre_scene_build(&self);
+    /// This is called before each scene swap occurs.
+    fn pre_scene_swap(&self);
+    /// This is called after each scene swap occurs. The PipelineInfo contains
+    /// the updated epochs and pipelines removed in the new scene compared to
+    /// the old scene.
+    fn post_scene_swap(&self, document_id: &Vec<DocumentId>, info: PipelineInfo);
+    /// This is called after a resource update operation on the scene builder
+    /// thread, in the case where resource updates were applied without a scene
+    /// build.
+    fn post_resource_update(&self, document_ids: &Vec<DocumentId>);
+    /// This is called after a scene build completes without any changes being
+    /// made. We guarantee that each pre_scene_build call will be matched with
+    /// exactly one of post_scene_swap, post_resource_update or
+    /// post_empty_scene_build.
+    fn post_empty_scene_build(&self);
+    /// This is a generic callback which provides an opportunity to run code
+    /// on the scene builder thread. This is called as part of the main message
+    /// loop of the scene builder thread, but outside of any specific message
+    /// handler.
+    fn poke(&self);
+    /// This is called exactly once, when the scene builder thread is about to
+    /// terminate.
+    fn deregister(&self);
+}
+
+/// Allows callers to hook into the main render_backend loop and provide
+/// additional frame ops for generate_frame transactions. These functions
+/// are all called from the render backend thread.
+pub trait AsyncPropertySampler {
+    /// This is called exactly once, when the render backend thread is started
+    /// and before it processes anything.
+    fn register(&self);
+    /// This is called for each transaction with the generate_frame flag set
+    /// (i.e. that will trigger a render). The list of frame messages returned
+    /// are processed as though they were part of the original transaction.
+    fn sample(&self, document_id: DocumentId, generated_frame_id: Option<u64>) -> Vec<FrameMsg>;
+    /// This is called exactly once, when the render backend thread is about to
+    /// terminate.
+    fn deregister(&self);
+}
+
+pub struct WebRenderOptions {
+    pub resource_override_path: Option<PathBuf>,
+    /// Whether to use shaders that have been optimized at build time.
+    pub use_optimized_shaders: bool,
+    pub enable_aa: bool,
+    pub enable_dithering: bool,
+    pub max_recorded_profiles: usize,
+    pub precache_flags: ShaderPrecacheFlags,
+    /// Enable sub-pixel anti-aliasing if a fast implementation is available.
+    pub enable_subpixel_aa: bool,
+    pub clear_color: ColorF,
+    pub enable_clear_scissor: Option<bool>,
+    pub max_internal_texture_size: Option<i32>,
+    pub image_tiling_threshold: i32,
+    pub upload_method: UploadMethod,
+    /// The default size in bytes for PBOs used to upload texture data.
+    pub upload_pbo_default_size: usize,
+    pub batched_upload_threshold: i32,
+    pub workers: Option<Arc<ThreadPool>>,
+    pub enable_multithreading: bool,
+    pub blob_image_handler: Option<Box<dyn BlobImageHandler>>,
+    pub crash_annotator: Option<Box<dyn CrashAnnotator>>,
+    pub size_of_op: Option<VoidPtrToSizeFn>,
+    pub enclosing_size_of_op: Option<VoidPtrToSizeFn>,
+    pub cached_programs: Option<Rc<ProgramCache>>,
+    pub debug_flags: DebugFlags,
+    pub renderer_id: Option<u64>,
+    pub scene_builder_hooks: Option<Box<dyn SceneBuilderHooks + Send>>,
+    pub sampler: Option<Box<dyn AsyncPropertySampler + Send>>,
+    pub support_low_priority_transactions: bool,
+    pub namespace_alloc_by_client: bool,
+    /// If namespaces are allocated by the client, then the namespace for fonts
+    /// must also be allocated by the client to avoid namespace collisions with
+    /// the backend.
+    pub shared_font_namespace: Option<IdNamespace>,
+    pub testing: bool,
+    /// Set to true if this GPU supports hardware fast clears as a performance
+    /// optimization. Likely requires benchmarking on various GPUs to see if
+    /// it is a performance win. The default is false, which tends to be best
+    /// performance on lower end / integrated GPUs.
+    pub gpu_supports_fast_clears: bool,
+    pub allow_dual_source_blending: bool,
+    pub allow_advanced_blend_equation: bool,
+    /// If true, allow textures to be initialized with glTexStorage.
+    /// This affects VRAM consumption and data upload paths.
+    pub allow_texture_storage_support: bool,
+    /// If true, we allow the data uploaded in a different format from the
+    /// one expected by the driver, pretending the format is matching, and
+    /// swizzling the components on all the shader sampling.
+    pub allow_texture_swizzling: bool,
+    /// Use `ps_clear` shader with batched quad rendering to clear the rects
+    /// in texture cache and picture cache tasks.
+    /// This helps to work around some Intel drivers
+    /// that incorrectly synchronize clears to following draws.
+    pub clear_caches_with_quads: bool,
+    /// Output the source of the shader with the given name.
+    pub dump_shader_source: Option<String>,
+    pub surface_origin_is_top_left: bool,
+    /// The configuration options defining how WR composites the final scene.
+    pub compositor_config: CompositorConfig,
+    pub enable_gpu_markers: bool,
+    /// If true, panic whenever a GL error occurs. This has a significant
+    /// performance impact, so only use when debugging specific problems!
+    pub panic_on_gl_error: bool,
+    pub picture_tile_size: Option<DeviceIntSize>,
+    pub texture_cache_config: TextureCacheConfig,
+    /// If true, we'll use instanced vertex attributes. Each instace is a quad.
+    /// If false, we'll duplicate the instance attributes per vertex and issue
+    /// regular indexed draws instead.
+    pub enable_instancing: bool,
+    /// If true, we'll reject contexts backed by a software rasterizer, except
+    /// Software WebRender.
+    pub reject_software_rasterizer: bool,
+    /// If enabled, pinch-zoom will apply the zoom factor during compositing
+    /// of picture cache tiles. This is higher performance (tiles are not
+    /// re-rasterized during zoom) but lower quality result. For most display
+    /// items, if the zoom factor is relatively small, bilinear filtering should
+    /// make the result look quite close to the high-quality zoom, except for glyphs.
+    pub low_quality_pinch_zoom: bool,
+    pub max_shared_surface_size: i32,
+}
+
+impl WebRenderOptions {
+    /// Number of batches to look back in history for adding the current
+    /// transparent instance into.
+    const BATCH_LOOKBACK_COUNT: usize = 10;
+
+    /// Since we are re-initializing the instance buffers on every draw call,
+    /// the driver has to internally manage PBOs in flight.
+    /// It's typically done by bucketing up to a specific limit, and then
+    /// just individually managing the largest buffers.
+    /// Having a limit here allows the drivers to more easily manage
+    /// the PBOs for us.
+    const MAX_INSTANCE_BUFFER_SIZE: usize = 0x20000; // actual threshold in macOS GL drivers
+}
+
+impl Default for WebRenderOptions {
+    fn default() -> Self {
+        WebRenderOptions {
+            resource_override_path: None,
+            use_optimized_shaders: false,
+            enable_aa: true,
+            enable_dithering: false,
+            debug_flags: DebugFlags::empty(),
+            max_recorded_profiles: 0,
+            precache_flags: ShaderPrecacheFlags::empty(),
+            enable_subpixel_aa: false,
+            clear_color: ColorF::new(1.0, 1.0, 1.0, 1.0),
+            enable_clear_scissor: None,
+            max_internal_texture_size: None,
+            image_tiling_threshold: 4096,
+            // This is best as `Immediate` on Angle, or `Pixelbuffer(Dynamic)` on GL,
+            // but we are unable to make this decision here, so picking the reasonable medium.
+            upload_method: UploadMethod::PixelBuffer(ONE_TIME_USAGE_HINT),
+            upload_pbo_default_size: 512 * 512 * 4,
+            batched_upload_threshold: 512 * 512,
+            workers: None,
+            enable_multithreading: true,
+            blob_image_handler: None,
+            crash_annotator: None,
+            size_of_op: None,
+            enclosing_size_of_op: None,
+            renderer_id: None,
+            cached_programs: None,
+            scene_builder_hooks: None,
+            sampler: None,
+            support_low_priority_transactions: false,
+            namespace_alloc_by_client: false,
+            shared_font_namespace: None,
+            testing: false,
+            gpu_supports_fast_clears: false,
+            allow_dual_source_blending: true,
+            allow_advanced_blend_equation: false,
+            allow_texture_storage_support: true,
+            allow_texture_swizzling: true,
+            clear_caches_with_quads: true,
+            dump_shader_source: None,
+            surface_origin_is_top_left: false,
+            compositor_config: CompositorConfig::default(),
+            enable_gpu_markers: true,
+            panic_on_gl_error: false,
+            picture_tile_size: None,
+            texture_cache_config: TextureCacheConfig::DEFAULT,
+            // Disabling instancing means more vertex data to upload and potentially
+            // process by the vertex shaders.
+            enable_instancing: true,
+            reject_software_rasterizer: false,
+            low_quality_pinch_zoom: false,
+            max_shared_surface_size: 2048,
+        }
+    }
+}
+
+/// Initializes WebRender and creates a `Renderer` and `RenderApiSender`.
+///
+/// # Examples
+/// Initializes a `Renderer` with some reasonable values. For more information see
+/// [`WebRenderOptions`][WebRenderOptions].
+///
+/// ```rust,ignore
+/// # use webrender::renderer::Renderer;
+/// # use std::path::PathBuf;
+/// let opts = webrender::WebRenderOptions {
+///    device_pixel_ratio: 1.0,
+///    resource_override_path: None,
+///    enable_aa: false,
+/// };
+/// let (renderer, sender) = Renderer::new(opts);
+/// ```
+/// [WebRenderOptions]: struct.WebRenderOptions.html
+pub fn create_webrender_instance(
+    gl: Rc<dyn gl::Gl>,
+    notifier: Box<dyn RenderNotifier>,
+    mut options: WebRenderOptions,
+    shaders: Option<&SharedShaders>,
+) -> Result<(Renderer, RenderApiSender), RendererError> {
+    if !wr_has_been_initialized() {
+        // If the profiler feature is enabled, try to load the profiler shared library
+        // if the path was provided.
+        #[cfg(feature = "profiler")]
+        unsafe {
+            if let Ok(ref tracy_path) = std::env::var("WR_TRACY_PATH") {
+                let ok = tracy_rs::load(tracy_path);
+                info!("Load tracy from {} -> {}", tracy_path, ok);
+            }
+        }
+
+        register_thread_with_profiler("Compositor".to_owned());
+    }
+
+    HAS_BEEN_INITIALIZED.store(true, Ordering::SeqCst);
+
+    let (api_tx, api_rx) = unbounded_channel();
+    let (result_tx, result_rx) = unbounded_channel();
+    let gl_type = gl.get_type();
+
+    let mut device = Device::new(
+        gl,
+        options.crash_annotator.clone(),
+        options.resource_override_path.clone(),
+        options.use_optimized_shaders,
+        options.upload_method.clone(),
+        options.batched_upload_threshold,
+        options.cached_programs.take(),
+        options.allow_texture_storage_support,
+        options.allow_texture_swizzling,
+        options.dump_shader_source.take(),
+        options.surface_origin_is_top_left,
+        options.panic_on_gl_error,
+    );
+
+    let color_cache_formats = device.preferred_color_formats();
+    let swizzle_settings = device.swizzle_settings();
+    let use_dual_source_blending =
+        device.get_capabilities().supports_dual_source_blending &&
+        options.allow_dual_source_blending;
+    let ext_blend_equation_advanced =
+        options.allow_advanced_blend_equation &&
+        device.get_capabilities().supports_advanced_blend_equation;
+    let ext_blend_equation_advanced_coherent =
+        device.supports_extension("GL_KHR_blend_equation_advanced_coherent");
+
+    let enable_clear_scissor = options
+        .enable_clear_scissor
+        .unwrap_or(device.get_capabilities().prefers_clear_scissor);
+
+    // 2048 is the minimum that the texture cache can work with.
+    const MIN_TEXTURE_SIZE: i32 = 2048;
+    let mut max_internal_texture_size = device.max_texture_size();
+    if max_internal_texture_size < MIN_TEXTURE_SIZE {
+        // Broken GL contexts can return a max texture size of zero (See #1260).
+        // Better to gracefully fail now than panic as soon as a texture is allocated.
+        error!(
+            "Device reporting insufficient max texture size ({})",
+            max_internal_texture_size
+        );
+        return Err(RendererError::MaxTextureSize);
+    }
+    if let Some(internal_limit) = options.max_internal_texture_size {
+        assert!(internal_limit >= MIN_TEXTURE_SIZE);
+        max_internal_texture_size = max_internal_texture_size.min(internal_limit);
+    }
+
+    if options.reject_software_rasterizer {
+        let renderer_name_lc = device.get_capabilities().renderer_name.to_lowercase();
+        if renderer_name_lc.contains("llvmpipe") || renderer_name_lc.contains("softpipe") || renderer_name_lc.contains("software rasterizer") {
+        return Err(RendererError::SoftwareRasterizer);
+        }
+    }
+
+    let image_tiling_threshold = options.image_tiling_threshold
+        .min(max_internal_texture_size);
+
+    device.begin_frame();
+
+    let shaders = match shaders {
+        Some(shaders) => Rc::clone(shaders),
+        None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)),
+    };
+
+    let dither_matrix_texture = if options.enable_dithering {
+        let dither_matrix: [u8; 64] = [
+            0,
+            48,
+            12,
+            60,
+            3,
+            51,
+            15,
+            63,
+            32,
+            16,
+            44,
+            28,
+            35,
+            19,
+            47,
+            31,
+            8,
+            56,
+            4,
+            52,
+            11,
+            59,
+            7,
+            55,
+            40,
+            24,
+            36,
+            20,
+            43,
+            27,
+            39,
+            23,
+            2,
+            50,
+            14,
+            62,
+            1,
+            49,
+            13,
+            61,
+            34,
+            18,
+            46,
+            30,
+            33,
+            17,
+            45,
+            29,
+            10,
+            58,
+            6,
+            54,
+            9,
+            57,
+            5,
+            53,
+            42,
+            26,
+            38,
+            22,
+            41,
+            25,
+            37,
+            21,
+        ];
+
+        let texture = device.create_texture(
+            ImageBufferKind::Texture2D,
+            ImageFormat::R8,
+            8,
+            8,
+            TextureFilter::Nearest,
+            None,
+        );
+        device.upload_texture_immediate(&texture, &dither_matrix);
+
+        Some(texture)
+    } else {
+        None
+    };
+
+    let max_primitive_instance_count =
+        WebRenderOptions::MAX_INSTANCE_BUFFER_SIZE / mem::size_of::<PrimitiveInstanceData>();
+    let vaos = vertex::RendererVAOs::new(
+        &mut device,
+        if options.enable_instancing { None } else { NonZeroUsize::new(max_primitive_instance_count) },
+    );
+
+    let texture_upload_pbo_pool = UploadPBOPool::new(&mut device, options.upload_pbo_default_size);
+    let staging_texture_pool = UploadTexturePool::new();
+    let texture_resolver = TextureResolver::new(&mut device);
+
+    let mut vertex_data_textures = Vec::new();
+    for _ in 0 .. VERTEX_DATA_TEXTURE_COUNT {
+        vertex_data_textures.push(vertex::VertexDataTextures::new());
+    }
+
+    // On some (mostly older, integrated) GPUs, the normal GPU texture cache update path
+    // doesn't work well when running on ANGLE, causing CPU stalls inside D3D and/or the
+    // GPU driver. See https://bugzilla.mozilla.org/show_bug.cgi?id=1576637 for much
+    // more detail. To reduce the number of code paths we have active that require testing,
+    // we will enable the GPU cache scatter update path on all devices running with ANGLE.
+    // We want a better solution long-term, but for now this is a significant performance
+    // improvement on HD4600 era GPUs, and shouldn't hurt performance in a noticeable
+    // way on other systems running under ANGLE.
+    let is_software = device.get_capabilities().renderer_name.starts_with("Software");
+
+    // On other GL platforms, like macOS or Android, creating many PBOs is very inefficient.
+    // This is what happens in GPU cache updates in PBO path. Instead, we switch everything
+    // except software GL to use the GPU scattered updates.
+    let supports_scatter = device.get_capabilities().supports_color_buffer_float;
+    let gpu_cache_texture = gpu_cache::GpuCacheTexture::new(
+        &mut device,
+        supports_scatter && !is_software,
+    )?;
+
+    device.end_frame();
+
+    let backend_notifier = notifier.clone();
+
+    let clear_alpha_targets_with_quads = !device.get_capabilities().supports_alpha_target_clears;
+
+    let prefer_subpixel_aa = options.enable_subpixel_aa && use_dual_source_blending;
+    let default_font_render_mode = match (options.enable_aa, prefer_subpixel_aa) {
+        (true, true) => FontRenderMode::Subpixel,
+        (true, false) => FontRenderMode::Alpha,
+        (false, _) => FontRenderMode::Mono,
+    };
+
+    let compositor_kind = match options.compositor_config {
+        CompositorConfig::Draw { max_partial_present_rects, draw_previous_partial_present_regions, .. } => {
+            CompositorKind::Draw { max_partial_present_rects, draw_previous_partial_present_regions }
+        }
+        CompositorConfig::Native { ref compositor } => {
+            let capabilities = compositor.get_capabilities(&mut device);
+
+            CompositorKind::Native {
+                capabilities,
+            }
+        }
+    };
+
+    let config = FrameBuilderConfig {
+        default_font_render_mode,
+        dual_source_blending_is_supported: use_dual_source_blending,
+        testing: options.testing,
+        gpu_supports_fast_clears: options.gpu_supports_fast_clears,
+        gpu_supports_advanced_blend: ext_blend_equation_advanced,
+        advanced_blend_is_coherent: ext_blend_equation_advanced_coherent,
+        gpu_supports_render_target_partial_update: device.get_capabilities().supports_render_target_partial_update,
+        external_images_require_copy: !device.get_capabilities().supports_image_external_essl3,
+        batch_lookback_count: WebRenderOptions::BATCH_LOOKBACK_COUNT,
+        background_color: Some(options.clear_color),
+        compositor_kind,
+        tile_size_override: None,
+        max_surface_override: None,
+        max_depth_ids: device.max_depth_ids(),
+        max_target_size: max_internal_texture_size,
+        force_invalidation: false,
+        is_software,
+        low_quality_pinch_zoom: options.low_quality_pinch_zoom,
+        max_shared_surface_size: options.max_shared_surface_size,
+    };
+    info!("WR {:?}", config);
+
+    let debug_flags = options.debug_flags;
+    let size_of_op = options.size_of_op;
+    let enclosing_size_of_op = options.enclosing_size_of_op;
+    let make_size_of_ops =
+        move || size_of_op.map(|o| MallocSizeOfOps::new(o, enclosing_size_of_op));
+    let workers = options
+        .workers
+        .take()
+        .unwrap_or_else(|| {
+            let worker = ThreadPoolBuilder::new()
+                .thread_name(|idx|{ format!("WRWorker#{}", idx) })
+                .start_handler(move |idx| {
+                    register_thread_with_profiler(format!("WRWorker#{}", idx));
+                    profiler::register_thread(&format!("WRWorker#{}", idx));
+                })
+                .exit_handler(move |_idx| {
+                    profiler::unregister_thread();
+                })
+                .build();
+            Arc::new(worker.unwrap())
+        });
+    let sampler = options.sampler;
+    let namespace_alloc_by_client = options.namespace_alloc_by_client;
+
+    // Ensure shared font keys exist within their own unique namespace so
+    // that they don't accidentally collide across Renderer instances.
+    let font_namespace = if namespace_alloc_by_client {
+        options.shared_font_namespace.expect("Shared font namespace must be allocated by client")
+    } else {
+        RenderBackend::next_namespace_id()
+    };
+    let fonts = SharedFontResources::new(font_namespace);
+
+    let blob_image_handler = options.blob_image_handler.take();
+    let scene_builder_hooks = options.scene_builder_hooks;
+    let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
+    let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
+    let lp_scene_thread_name = format!("WRSceneBuilderLP#{}", options.renderer_id.unwrap_or(0));
+    let glyph_rasterizer = GlyphRasterizer::new(workers, device.get_capabilities().supports_r8_texture_upload);
+
+    let (scene_builder_channels, scene_tx) =
+        SceneBuilderThreadChannels::new(api_tx.clone());
+
+    let sb_fonts = fonts.clone();
+
+    thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
+        register_thread_with_profiler(scene_thread_name.clone());
+        profiler::register_thread(&scene_thread_name);
+
+        let mut scene_builder = SceneBuilderThread::new(
+            config,
+            sb_fonts,
+            make_size_of_ops(),
+            scene_builder_hooks,
+            scene_builder_channels,
+        );
+        scene_builder.run();
+
+        profiler::unregister_thread();
+    })?;
+
+    let low_priority_scene_tx = if options.support_low_priority_transactions {
+        let (low_priority_scene_tx, low_priority_scene_rx) = unbounded_channel();
+        let lp_builder = LowPrioritySceneBuilderThread {
+            rx: low_priority_scene_rx,
+            tx: scene_tx.clone(),
+        };
+
+        thread::Builder::new().name(lp_scene_thread_name.clone()).spawn(move || {
+            register_thread_with_profiler(lp_scene_thread_name.clone());
+            profiler::register_thread(&lp_scene_thread_name);
+
+            let mut scene_builder = lp_builder;
+            scene_builder.run();
+
+            profiler::unregister_thread();
+        })?;
+
+        low_priority_scene_tx
+    } else {
+        scene_tx.clone()
+    };
+
+    let rb_blob_handler = blob_image_handler
+        .as_ref()
+        .map(|handler| handler.create_similar());
+
+    let texture_cache_config = options.texture_cache_config.clone();
+    let mut picture_tile_size = options.picture_tile_size.unwrap_or(picture::TILE_SIZE_DEFAULT);
+    // Clamp the picture tile size to reasonable values.
+    picture_tile_size.width = picture_tile_size.width.max(128).min(4096);
+    picture_tile_size.height = picture_tile_size.height.max(128).min(4096);
+
+    let picture_texture_filter = if options.low_quality_pinch_zoom {
+        TextureFilter::Linear
+    } else {
+        TextureFilter::Nearest
+    };
+
+    let rb_scene_tx = scene_tx.clone();
+    let rb_fonts = fonts.clone();
+    let enable_multithreading = options.enable_multithreading;
+    thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
+        register_thread_with_profiler(rb_thread_name.clone());
+        profiler::register_thread(&rb_thread_name);
+
+        let texture_cache = TextureCache::new(
+            max_internal_texture_size,
+            image_tiling_threshold,
+            color_cache_formats,
+            swizzle_settings,
+            &texture_cache_config,
+        );
+
+        let picture_textures = PictureTextures::new(
+            picture_tile_size,
+            picture_texture_filter,
+        );
+
+        let glyph_cache = GlyphCache::new();
+
+        let mut resource_cache = ResourceCache::new(
+            texture_cache,
+            picture_textures,
+            glyph_rasterizer,
+            glyph_cache,
+            rb_fonts,
+            rb_blob_handler,
+        );
+
+        resource_cache.enable_multithreading(enable_multithreading);
+
+        let mut backend = RenderBackend::new(
+            api_rx,
+            result_tx,
+            rb_scene_tx,
+            resource_cache,
+            backend_notifier,
+            config,
+            sampler,
+            make_size_of_ops(),
+            debug_flags,
+            namespace_alloc_by_client,
+        );
+        backend.run();
+        profiler::unregister_thread();
+    })?;
+
+    let debug_method = if !options.enable_gpu_markers {
+        // The GPU markers are disabled.
+        GpuDebugMethod::None
+    } else if device.supports_extension("GL_KHR_debug") {
+        GpuDebugMethod::KHR
+    } else if device.supports_extension("GL_EXT_debug_marker") {
+        GpuDebugMethod::MarkerEXT
+    } else {
+        warn!("asking to enable_gpu_markers but no supporting extension was found");
+        GpuDebugMethod::None
+    };
+
+    info!("using {:?}", debug_method);
+
+    let gpu_profiler = GpuProfiler::new(Rc::clone(device.rc_gl()), debug_method);
+    #[cfg(feature = "capture")]
+    let read_fbo = device.create_fbo();
+
+    let mut renderer = Renderer {
+        result_rx,
+        api_tx: api_tx.clone(),
+        device,
+        active_documents: FastHashMap::default(),
+        pending_texture_updates: Vec::new(),
+        pending_texture_cache_updates: false,
+        pending_native_surface_updates: Vec::new(),
+        pending_gpu_cache_updates: Vec::new(),
+        pending_gpu_cache_clear: false,
+        pending_shader_updates: Vec::new(),
+        shaders,
+        debug: debug::LazyInitializedDebugRenderer::new(),
+        debug_flags: DebugFlags::empty(),
+        profile: TransactionProfile::new(),
+        frame_counter: 0,
+        resource_upload_time: 0.0,
+        gpu_cache_upload_time: 0.0,
+        profiler: Profiler::new(),
+        max_recorded_profiles: options.max_recorded_profiles,
+        clear_color: options.clear_color,
+        enable_clear_scissor,
+        enable_advanced_blend_barriers: !ext_blend_equation_advanced_coherent,
+        clear_caches_with_quads: options.clear_caches_with_quads,
+        clear_alpha_targets_with_quads,
+        last_time: 0,
+        gpu_profiler,
+        vaos,
+        vertex_data_textures,
+        current_vertex_data_textures: 0,
+        pipeline_info: PipelineInfo::default(),
+        dither_matrix_texture,
+        external_image_handler: None,
+        size_of_ops: make_size_of_ops(),
+        cpu_profiles: VecDeque::new(),
+        gpu_profiles: VecDeque::new(),
+        gpu_cache_texture,
+        gpu_cache_debug_chunks: Vec::new(),
+        gpu_cache_frame_id: FrameId::INVALID,
+        gpu_cache_overflow: false,
+        texture_upload_pbo_pool,
+        staging_texture_pool,
+        texture_resolver,
+        renderer_errors: Vec::new(),
+        async_frame_recorder: None,
+        async_screenshots: None,
+        #[cfg(feature = "capture")]
+        read_fbo,
+        #[cfg(feature = "replay")]
+        owned_external_images: FastHashMap::default(),
+        notifications: Vec::new(),
+        device_size: None,
+        zoom_debug_texture: None,
+        cursor_position: DeviceIntPoint::zero(),
+        shared_texture_cache_cleared: false,
+        documents_seen: FastHashSet::default(),
+        force_redraw: true,
+        compositor_config: options.compositor_config,
+        current_compositor_kind: compositor_kind,
+        allocated_native_surfaces: FastHashSet::default(),
+        debug_overlay_state: DebugOverlayState::new(),
+        buffer_damage_tracker: BufferDamageTracker::default(),
+        max_primitive_instance_count,
+        enable_instancing: options.enable_instancing,
+        consecutive_oom_frames: 0,
+        target_frame_publish_id: None,
+        pending_result_msg: None,
+    };
+
+    // We initially set the flags to default and then now call set_debug_flags
+    // to ensure any potential transition when enabling a flag is run.
+    renderer.set_debug_flags(debug_flags);
+
+    let sender = RenderApiSender::new(
+        api_tx,
+        scene_tx,
+        low_priority_scene_tx,
+        blob_image_handler,
+        fonts,
+    );
+    Ok((renderer, sender))
+}
diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs
new file mode 100644
index 0000000000..b44c107a66
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/mod.rs
@@ -0,0 +1,5923 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! The high-level module responsible for interfacing with the GPU.
+//!
+//! Much of WebRender's design is driven by separating work into different
+//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
+//! all communication with the GPU to one thread, the render thread. But since
+//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
+//! the computation of what commands to issue) to another thread, the
+//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
+//! thread (like the SceneBuilder threads or Rayon workers), but the
+//! Render-vs-RenderBackend distinction is the most important.
+//!
+//! The consumer is responsible for initializing the render thread before
+//! calling into WebRender, which means that this module also serves as the
+//! initial entry point into WebRender, and is responsible for spawning the
+//! various other threads discussed above. That said, WebRender initialization
+//! returns both the `Renderer` instance as well as a channel for communicating
+//! directly with the `RenderBackend`. Aside from a few high-level operations
+//! like 'render now', most of interesting commands from the consumer go over
+//! that channel and operate on the `RenderBackend`.
+//!
+//! ## Space conversion guidelines
+//! At this stage, we shuld be operating with `DevicePixel` and `FramebufferPixel` only.
+//! "Framebuffer" space represents the final destination of our rendeing,
+//! and it happens to be Y-flipped on OpenGL. The conversion is done as follows:
+//!   - for rasterized primitives, the orthographics projection transforms
+//! the content rectangle to -1 to 1
+//!   - the viewport transformation is setup to map the whole range to
+//! the framebuffer rectangle provided by the document view, stored in `DrawTarget`
+//!   - all the direct framebuffer operations, like blitting, reading pixels, and setting
+//! up the scissor, are accepting already transformed coordinates, which we can get by
+//! calling `DrawTarget::to_framebuffer_rect`
+
+use api::{ColorF, ColorU, MixBlendMode};
+use api::{DocumentId, Epoch, ExternalImageHandler, RenderReasons};
+#[cfg(feature = "replay")]
+use api::ExternalImageId;
+use api::{ExternalImageSource, ExternalImageType, ImageFormat, PremultipliedColorF};
+use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest, ImageBufferKind};
+#[cfg(feature = "replay")]
+use api::ExternalImage;
+use api::FramePublishId;
+use api::units::*;
+use api::channel::{Sender, Receiver};
+pub use api::DebugFlags;
+use core::time::Duration;
+
+use crate::render_api::{DebugCommand, ApiMsg, MemoryReport};
+use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
+use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform};
+use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData};
+use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation};
+use crate::composite::{TileKind};
+use crate::debug_colors;
+use crate::device::{DepthFunction, Device, DrawTarget, ExternalTexture, GpuFrameId, UploadPBOPool};
+use crate::device::{ReadTarget, ShaderError, Texture, TextureFilter, TextureFlags, TextureSlot};
+use crate::device::query::{GpuSampler, GpuTimer};
+#[cfg(feature = "capture")]
+use crate::device::FBOId;
+use crate::debug_item::DebugItem;
+use crate::frame_builder::Frame;
+use glyph_rasterizer::GlyphFormat;
+use crate::gpu_cache::{GpuCacheUpdate, GpuCacheUpdateList};
+use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
+use crate::gpu_types::{ScalingInstance, SvgFilterInstance, CopyInstance, MaskInstance, PrimitiveInstanceData};
+use crate::gpu_types::{BlurInstance, ClearInstance, CompositeInstance, CompositorTransform};
+use crate::internal_types::{TextureSource, TextureCacheCategory, FrameId};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use crate::internal_types::DebugOutput;
+use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
+use crate::internal_types::{TextureCacheAllocInfo, TextureCacheAllocationKind, TextureUpdateList};
+use crate::internal_types::{RenderTargetInfo, Swizzle, DeferredResolveIndex};
+use crate::picture::ResolvedSurfaceTexture;
+use crate::prim_store::DeferredResolve;
+use crate::profiler::{self, GpuProfileTag, TransactionProfile};
+use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled};
+use crate::device::query::GpuProfiler;
+use crate::render_target::{ResolveOp};
+use crate::render_task_graph::{RenderTaskGraph};
+use crate::render_task::{RenderTask, RenderTaskKind, ReadbackTask};
+use crate::screen_capture::AsyncScreenshotGrabber;
+use crate::render_target::{AlphaRenderTarget, ColorRenderTarget, PictureCacheTarget, PictureCacheTargetKind};
+use crate::render_target::{RenderTarget, TextureCacheRenderTarget};
+use crate::render_target::{RenderTargetKind, BlitJob};
+use crate::telemetry::Telemetry;
+use crate::tile_cache::PictureCacheDebugInfo;
+use crate::util::drain_filter;
+use crate::rectangle_occlusion as occlusion;
+use upload::{upload_to_texture_cache, UploadTexturePool};
+use init::*;
+
+use euclid::{rect, Transform3D, Scale, default};
+use gleam::gl;
+use malloc_size_of::MallocSizeOfOps;
+
+#[cfg(feature = "replay")]
+use std::sync::Arc;
+
+use std::{
+    cell::RefCell,
+    collections::VecDeque,
+    f32,
+    ffi::c_void,
+    mem,
+    num::NonZeroUsize,
+    path::PathBuf,
+    rc::Rc,
+};
+#[cfg(any(feature = "capture", feature = "replay"))]
+use std::collections::hash_map::Entry;
+use time::precise_time_ns;
+
+mod debug;
+mod gpu_buffer;
+mod gpu_cache;
+mod shade;
+mod vertex;
+mod upload;
+pub(crate) mod init;
+
+pub use debug::DebugRenderer;
+pub use shade::{Shaders, SharedShaders};
+pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH};
+pub use gpu_buffer::{GpuBuffer, GpuBufferBuilder, GpuBufferAddress};
+
+/// The size of the array of each type of vertex data texture that
+/// is round-robin-ed each frame during bind_frame_data. Doing this
+/// helps avoid driver stalls while updating the texture in some
+/// drivers. The size of these textures are typically very small
+/// (e.g. < 16 kB) so it's not a huge waste of memory. Despite that,
+/// this is a short-term solution - we want to find a better way
+/// to provide this frame data, which will likely involve some
+/// combination of UBO/SSBO usage. Although this only affects some
+/// platforms, it's enabled on all platforms to reduce testing
+/// differences between platforms.
+pub const VERTEX_DATA_TEXTURE_COUNT: usize = 3;
+
+/// Number of GPU blocks per UV rectangle provided for an image.
+pub const BLOCKS_PER_UV_RECT: usize = 2;
+
+const GPU_TAG_BRUSH_OPACITY: GpuProfileTag = GpuProfileTag {
+    label: "B_Opacity",
+    color: debug_colors::DARKMAGENTA,
+};
+const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "B_LinearGradient",
+    color: debug_colors::POWDERBLUE,
+};
+const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
+    label: "B_YuvImage",
+    color: debug_colors::DARKGREEN,
+};
+const GPU_TAG_BRUSH_MIXBLEND: GpuProfileTag = GpuProfileTag {
+    label: "B_MixBlend",
+    color: debug_colors::MAGENTA,
+};
+const GPU_TAG_BRUSH_BLEND: GpuProfileTag = GpuProfileTag {
+    label: "B_Blend",
+    color: debug_colors::ORANGE,
+};
+const GPU_TAG_BRUSH_IMAGE: GpuProfileTag = GpuProfileTag {
+    label: "B_Image",
+    color: debug_colors::SPRINGGREEN,
+};
+const GPU_TAG_BRUSH_SOLID: GpuProfileTag = GpuProfileTag {
+    label: "B_Solid",
+    color: debug_colors::RED,
+};
+const GPU_TAG_CACHE_CLIP: GpuProfileTag = GpuProfileTag {
+    label: "C_Clip",
+    color: debug_colors::PURPLE,
+};
+const GPU_TAG_CACHE_BORDER: GpuProfileTag = GpuProfileTag {
+    label: "C_Border",
+    color: debug_colors::CORNSILK,
+};
+const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag {
+    label: "C_LineDecoration",
+    color: debug_colors::YELLOWGREEN,
+};
+const GPU_TAG_CACHE_FAST_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_FastLinearGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_LinearGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_RadialGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_CACHE_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_ConicGradient",
+    color: debug_colors::BROWN,
+};
+const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
+    label: "target init",
+    color: debug_colors::SLATEGREY,
+};
+const GPU_TAG_SETUP_DATA: GpuProfileTag = GpuProfileTag {
+    label: "data init",
+    color: debug_colors::LIGHTGREY,
+};
+const GPU_TAG_PRIM_SPLIT_COMPOSITE: GpuProfileTag = GpuProfileTag {
+    label: "SplitComposite",
+    color: debug_colors::DARKBLUE,
+};
+const GPU_TAG_PRIM_TEXT_RUN: GpuProfileTag = GpuProfileTag {
+    label: "TextRun",
+    color: debug_colors::BLUE,
+};
+const GPU_TAG_PRIMITIVE: GpuProfileTag = GpuProfileTag {
+    label: "Primitive",
+    color: debug_colors::RED,
+};
+const GPU_TAG_INDIRECT_PRIM: GpuProfileTag = GpuProfileTag {
+    label: "Primitive (indirect)",
+    color: debug_colors::YELLOWGREEN,
+};
+const GPU_TAG_INDIRECT_MASK: GpuProfileTag = GpuProfileTag {
+    label: "Mask (indirect)",
+    color: debug_colors::IVORY,
+};
+const GPU_TAG_BLUR: GpuProfileTag = GpuProfileTag {
+    label: "Blur",
+    color: debug_colors::VIOLET,
+};
+const GPU_TAG_BLIT: GpuProfileTag = GpuProfileTag {
+    label: "Blit",
+    color: debug_colors::LIME,
+};
+const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag {
+    label: "Scale",
+    color: debug_colors::GHOSTWHITE,
+};
+const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
+    label: "Alpha targets",
+    color: debug_colors::BLACK,
+};
+const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
+    label: "Opaque pass",
+    color: debug_colors::BLACK,
+};
+const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
+    label: "Transparent pass",
+    color: debug_colors::BLACK,
+};
+const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag {
+    label: "SvgFilter",
+    color: debug_colors::LEMONCHIFFON,
+};
+const GPU_TAG_COMPOSITE: GpuProfileTag = GpuProfileTag {
+    label: "Composite",
+    color: debug_colors::TOMATO,
+};
+const GPU_TAG_CLEAR: GpuProfileTag = GpuProfileTag {
+    label: "Clear",
+    color: debug_colors::CHOCOLATE,
+};
+
+/// The clear color used for the texture cache when the debug display is enabled.
+/// We use a shade of blue so that we can still identify completely blue items in
+/// the texture cache.
+pub const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];
+
+impl BatchKind {
+    fn sampler_tag(&self) -> GpuProfileTag {
+        match *self {
+            BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Solid => GPU_TAG_BRUSH_SOLID,
+                    BrushBatchKind::Image(..) => GPU_TAG_BRUSH_IMAGE,
+                    BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
+                    BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
+                    BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
+                    BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
+                    BrushBatchKind::Opacity => GPU_TAG_BRUSH_OPACITY,
+                }
+            }
+            BatchKind::TextRun(_) => GPU_TAG_PRIM_TEXT_RUN,
+            BatchKind::Primitive => GPU_TAG_PRIMITIVE,
+        }
+    }
+}
+
+fn flag_changed(before: DebugFlags, after: DebugFlags, select: DebugFlags) -> Option<bool> {
+    if before & select != after & select {
+        Some(after.contains(select))
+    } else {
+        None
+    }
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug)]
+pub enum ShaderColorMode {
+    FromRenderPassMode = 0,
+    Alpha = 1,
+    SubpixelWithBgColorPass0 = 2,
+    SubpixelWithBgColorPass1 = 3,
+    SubpixelWithBgColorPass2 = 4,
+    SubpixelDualSource = 5,
+    BitmapShadow = 6,
+    ColorBitmap = 7,
+    Image = 8,
+    MultiplyDualSource = 9,
+}
+
+impl From<GlyphFormat> for ShaderColorMode {
+    fn from(format: GlyphFormat) -> ShaderColorMode {
+        match format {
+            GlyphFormat::Alpha |
+            GlyphFormat::TransformedAlpha |
+            GlyphFormat::Bitmap => ShaderColorMode::Alpha,
+            GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
+                panic!("Subpixel glyph formats must be handled separately.");
+            }
+            GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
+        }
+    }
+}
+
+/// Enumeration of the texture samplers used across the various WebRender shaders.
+///
+/// Each variant corresponds to a uniform declared in shader source. We only bind
+/// the variants we need for a given shader, so not every variant is bound for every
+/// batch.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(crate) enum TextureSampler {
+    Color0,
+    Color1,
+    Color2,
+    GpuCache,
+    TransformPalette,
+    RenderTasks,
+    Dither,
+    PrimitiveHeadersF,
+    PrimitiveHeadersI,
+    ClipMask,
+    GpuBuffer,
+}
+
+impl TextureSampler {
+    pub(crate) fn color(n: usize) -> TextureSampler {
+        match n {
+            0 => TextureSampler::Color0,
+            1 => TextureSampler::Color1,
+            2 => TextureSampler::Color2,
+            _ => {
+                panic!("There are only 3 color samplers.");
+            }
+        }
+    }
+}
+
+impl Into<TextureSlot> for TextureSampler {
+    fn into(self) -> TextureSlot {
+        match self {
+            TextureSampler::Color0 => TextureSlot(0),
+            TextureSampler::Color1 => TextureSlot(1),
+            TextureSampler::Color2 => TextureSlot(2),
+            TextureSampler::GpuCache => TextureSlot(3),
+            TextureSampler::TransformPalette => TextureSlot(4),
+            TextureSampler::RenderTasks => TextureSlot(5),
+            TextureSampler::Dither => TextureSlot(6),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(7),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(8),
+            TextureSampler::ClipMask => TextureSlot(9),
+            TextureSampler::GpuBuffer => TextureSlot(10),
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum GraphicsApi {
+    OpenGL,
+}
+
+#[derive(Clone, Debug)]
+pub struct GraphicsApiInfo {
+    pub kind: GraphicsApi,
+    pub renderer: String,
+    pub version: String,
+}
+
+#[derive(Debug)]
+pub struct GpuProfile {
+    pub frame_id: GpuFrameId,
+    pub paint_time_ns: u64,
+}
+
+impl GpuProfile {
+    fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile {
+        let mut paint_time_ns = 0;
+        for timer in timers {
+            paint_time_ns += timer.time_ns;
+        }
+        GpuProfile {
+            frame_id,
+            paint_time_ns,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct CpuProfile {
+    pub frame_id: GpuFrameId,
+    pub backend_time_ns: u64,
+    pub composite_time_ns: u64,
+    pub draw_calls: usize,
+}
+
+impl CpuProfile {
+    fn new(
+        frame_id: GpuFrameId,
+        backend_time_ns: u64,
+        composite_time_ns: u64,
+        draw_calls: usize,
+    ) -> CpuProfile {
+        CpuProfile {
+            frame_id,
+            backend_time_ns,
+            composite_time_ns,
+            draw_calls,
+        }
+    }
+}
+
+/// The selected partial present mode for a given frame.
+#[derive(Debug, Copy, Clone)]
+enum PartialPresentMode {
+    /// The device supports fewer dirty rects than the number of dirty rects
+    /// that WR produced. In this case, the WR dirty rects are union'ed into
+    /// a single dirty rect, that is provided to the caller.
+    Single {
+        dirty_rect: DeviceRect,
+    },
+}
+
+struct CacheTexture {
+    texture: Texture,
+    category: TextureCacheCategory,
+}
+
+/// Helper struct for resolving device Textures for use during rendering passes.
+///
+/// Manages the mapping between the at-a-distance texture handles used by the
+/// `RenderBackend` (which does not directly interface with the GPU) and actual
+/// device texture handles.
+struct TextureResolver {
+    /// A map to resolve texture cache IDs to native textures.
+    texture_cache_map: FastHashMap<CacheTextureId, CacheTexture>,
+
+    /// Map of external image IDs to native textures.
+    external_images: FastHashMap<DeferredResolveIndex, ExternalTexture>,
+
+    /// A special 1x1 dummy texture used for shaders that expect to work with
+    /// the output of the previous pass but are actually running in the first
+    /// pass.
+    dummy_cache_texture: Texture,
+}
+
+impl TextureResolver {
+    fn new(device: &mut Device) -> TextureResolver {
+        let dummy_cache_texture = device
+            .create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::RGBA8,
+                1,
+                1,
+                TextureFilter::Linear,
+                None,
+            );
+        device.upload_texture_immediate(
+            &dummy_cache_texture,
+            &[0xff, 0xff, 0xff, 0xff],
+        );
+
+        TextureResolver {
+            texture_cache_map: FastHashMap::default(),
+            external_images: FastHashMap::default(),
+            dummy_cache_texture,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        device.delete_texture(self.dummy_cache_texture);
+
+        for (_id, item) in self.texture_cache_map {
+            device.delete_texture(item.texture);
+        }
+    }
+
+    fn begin_frame(&mut self) {
+    }
+
+    fn end_pass(
+        &mut self,
+        device: &mut Device,
+        textures_to_invalidate: &[CacheTextureId],
+    ) {
+        // For any texture that is no longer needed, immediately
+        // invalidate it so that tiled GPUs don't need to resolve it
+        // back to memory.
+        for texture_id in textures_to_invalidate {
+            let render_target = &self.texture_cache_map[texture_id].texture;
+            device.invalidate_render_target(render_target);
+        }
+    }
+
+    // Bind a source texture to the device.
+    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) -> Swizzle {
+        match *texture_id {
+            TextureSource::Invalid => {
+                Swizzle::default()
+            }
+            TextureSource::Dummy => {
+                let swizzle = Swizzle::default();
+                device.bind_texture(sampler, &self.dummy_cache_texture, swizzle);
+                swizzle
+            }
+            TextureSource::External(ref index, _) => {
+                let texture = self.external_images
+                    .get(index)
+                    .expect("BUG: External image should be resolved by now");
+                device.bind_external_texture(sampler, texture);
+                Swizzle::default()
+            }
+            TextureSource::TextureCache(index, swizzle) => {
+                let texture = &self.texture_cache_map[&index].texture;
+                device.bind_texture(sampler, texture, swizzle);
+                swizzle
+            }
+        }
+    }
+
+    // Get the real (OpenGL) texture ID for a given source texture.
+    // For a texture cache texture, the IDs are stored in a vector
+    // map for fast access.
+    fn resolve(&self, texture_id: &TextureSource) -> Option<(&Texture, Swizzle)> {
+        match *texture_id {
+            TextureSource::Invalid => None,
+            TextureSource::Dummy => {
+                Some((&self.dummy_cache_texture, Swizzle::default()))
+            }
+            TextureSource::External(..) => {
+                panic!("BUG: External textures cannot be resolved, they can only be bound.");
+            }
+            TextureSource::TextureCache(index, swizzle) => {
+                Some((&self.texture_cache_map[&index].texture, swizzle))
+            }
+        }
+    }
+
+    // Retrieve the deferred / resolved UV rect if an external texture, otherwise
+    // return the default supplied UV rect.
+    fn get_uv_rect(
+        &self,
+        source: &TextureSource,
+        default_value: TexelRect,
+    ) -> TexelRect {
+        match source {
+            TextureSource::External(ref index, _) => {
+                let texture = self.external_images
+                    .get(index)
+                    .expect("BUG: External image should be resolved by now");
+                texture.get_uv_rect()
+            }
+            _ => {
+                default_value
+            }
+        }
+    }
+
+    /// Returns the size of the texture in pixels
+    fn get_texture_size(&self, texture: &TextureSource) -> DeviceIntSize {
+        match *texture {
+            TextureSource::Invalid => DeviceIntSize::zero(),
+            TextureSource::TextureCache(id, _) => {
+                self.texture_cache_map[&id].texture.get_dimensions()
+            },
+            TextureSource::External(index, _) => {
+                let uv_rect = self.external_images[&index].get_uv_rect();
+                (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32()
+            },
+            TextureSource::Dummy => DeviceIntSize::new(1, 1),
+        }
+    }
+
+    fn report_memory(&self) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        // We're reporting GPU memory rather than heap-allocations, so we don't
+        // use size_of_op.
+        for item in self.texture_cache_map.values() {
+            let counter = match item.category {
+                TextureCacheCategory::Atlas => &mut report.atlas_textures,
+                TextureCacheCategory::Standalone => &mut report.standalone_textures,
+                TextureCacheCategory::PictureTile => &mut report.picture_tile_textures,
+                TextureCacheCategory::RenderTarget => &mut report.render_target_textures,
+            };
+            *counter += item.texture.size_in_bytes();
+        }
+
+        report
+    }
+
+    fn update_profile(&self, profile: &mut TransactionProfile) {
+        let mut external_image_bytes = 0;
+        for img in self.external_images.values() {
+            let uv_rect = img.get_uv_rect();
+            let size = (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32();
+
+            // Assume 4 bytes per pixels which is true most of the time but
+            // not always.
+            let bpp = 4;
+            external_image_bytes += size.area() as usize * bpp;
+        }
+
+        profile.set(profiler::EXTERNAL_IMAGE_BYTES, profiler::bytes_to_mb(external_image_bytes));
+    }
+
+    fn get_cache_texture_mut(&mut self, id: &CacheTextureId) -> &mut Texture {
+        &mut self.texture_cache_map
+            .get_mut(id)
+            .expect("bug: texture not allocated")
+            .texture
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BlendMode {
+    None,
+    Alpha,
+    PremultipliedAlpha,
+    PremultipliedDestOut,
+    SubpixelDualSource,
+    SubpixelWithBgColor,
+    Advanced(MixBlendMode),
+    MultiplyDualSource,
+    Screen,
+    Exclusion,
+    PlusLighter,
+}
+
+impl BlendMode {
+    /// Decides when a given mix-blend-mode can be implemented in terms of
+    /// simple blending, dual-source blending, advanced blending, or not at
+    /// all based on available capabilities.
+    pub fn from_mix_blend_mode(
+        mode: MixBlendMode,
+        advanced_blend: bool,
+        coherent: bool,
+        dual_source: bool,
+    ) -> Option<BlendMode> {
+        // If we emulate a mix-blend-mode via simple or dual-source blending,
+        // care must be taken to output alpha As + Ad*(1-As) regardless of what
+        // the RGB output is to comply with the mix-blend-mode spec.
+        Some(match mode {
+            // If we have coherent advanced blend, just use that.
+            _ if advanced_blend && coherent => BlendMode::Advanced(mode),
+            // Screen can be implemented as Cs + Cd - Cs*Cd => Cs + Cd*(1-Cs)
+            MixBlendMode::Screen => BlendMode::Screen,
+            // Exclusion can be implemented as Cs + Cd - 2*Cs*Cd => Cs*(1-Cd) + Cd*(1-Cs)
+            MixBlendMode::Exclusion => BlendMode::Exclusion,
+            // PlusLighter is basically a clamped add.
+            MixBlendMode::PlusLighter => BlendMode::PlusLighter,
+            // Multiply can be implemented as Cs*Cd + Cs*(1-Ad) + Cd*(1-As) => Cs*(1-Ad) + Cd*(1 - SRC1=(As-Cs))
+            MixBlendMode::Multiply if dual_source => BlendMode::MultiplyDualSource,
+            // Otherwise, use advanced blend without coherency if available.
+            _ if advanced_blend => BlendMode::Advanced(mode),
+            // If advanced blend is not available, then we have to use brush_mix_blend.
+            _ => return None,
+        })
+    }
+}
+
+/// Information about the state of the debugging / profiler overlay in native compositing mode.
+struct DebugOverlayState {
+    /// True if any of the current debug flags will result in drawing a debug overlay.
+    is_enabled: bool,
+
+    /// The current size of the debug overlay surface. None implies that the
+    /// debug surface isn't currently allocated.
+    current_size: Option<DeviceIntSize>,
+}
+
+impl DebugOverlayState {
+    fn new() -> Self {
+        DebugOverlayState {
+            is_enabled: false,
+            current_size: None,
+        }
+    }
+}
+
+/// Tracks buffer damage rects over a series of frames.
+#[derive(Debug, Default)]
+pub(crate) struct BufferDamageTracker {
+    damage_rects: [DeviceRect; 2],
+    current_offset: usize,
+}
+
+impl BufferDamageTracker {
+    /// Sets the damage rect for the current frame. Should only be called *after*
+    /// get_damage_rect() has been called to get the current backbuffer's damage rect.
+    fn push_dirty_rect(&mut self, rect: &DeviceRect) {
+        self.damage_rects[self.current_offset] = rect.clone();
+        self.current_offset = match self.current_offset {
+            0 => self.damage_rects.len() - 1,
+            n => n - 1,
+        }
+    }
+
+    /// Gets the damage rect for the current backbuffer, given the backbuffer's age.
+    /// (The number of frames since it was previously the backbuffer.)
+    /// Returns an empty rect if the buffer is valid, and None if the entire buffer is invalid.
+    fn get_damage_rect(&self, buffer_age: usize) -> Option<DeviceRect> {
+        match buffer_age {
+            // 0 means this is a new buffer, so is completely invalid.
+            0 => None,
+            // 1 means this backbuffer was also the previous frame's backbuffer
+            // (so must have been copied to the frontbuffer). It is therefore entirely valid.
+            1 => Some(DeviceRect::zero()),
+            // We must calculate the union of the damage rects since this buffer was previously
+            // the backbuffer.
+            n if n <= self.damage_rects.len() + 1 => {
+                Some(
+                    self.damage_rects.iter()
+                        .cycle()
+                        .skip(self.current_offset + 1)
+                        .take(n - 1)
+                        .fold(DeviceRect::zero(), |acc, r| acc.union(r))
+                )
+            }
+            // The backbuffer is older than the number of frames for which we track,
+            // so we treat it as entirely invalid.
+            _ => None,
+        }
+    }
+}
+
+/// The renderer is responsible for submitting to the GPU the work prepared by the
+/// RenderBackend.
+///
+/// We have a separate `Renderer` instance for each instance of WebRender (generally
+/// one per OS window), and all instances share the same thread.
+pub struct Renderer {
+    result_rx: Receiver<ResultMsg>,
+    api_tx: Sender<ApiMsg>,
+    pub device: Device,
+    pending_texture_updates: Vec<TextureUpdateList>,
+    /// True if there are any TextureCacheUpdate pending.
+    pending_texture_cache_updates: bool,
+    pending_native_surface_updates: Vec<NativeSurfaceOperation>,
+    pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
+    pending_gpu_cache_clear: bool,
+    pending_shader_updates: Vec<PathBuf>,
+    active_documents: FastHashMap<DocumentId, RenderedDocument>,
+
+    shaders: Rc<RefCell<Shaders>>,
+
+    max_recorded_profiles: usize,
+
+    clear_color: ColorF,
+    enable_clear_scissor: bool,
+    enable_advanced_blend_barriers: bool,
+    clear_caches_with_quads: bool,
+    clear_alpha_targets_with_quads: bool,
+
+    debug: debug::LazyInitializedDebugRenderer,
+    debug_flags: DebugFlags,
+    profile: TransactionProfile,
+    frame_counter: u64,
+    resource_upload_time: f64,
+    gpu_cache_upload_time: f64,
+    profiler: Profiler,
+
+    last_time: u64,
+
+    pub gpu_profiler: GpuProfiler,
+    vaos: vertex::RendererVAOs,
+
+    gpu_cache_texture: gpu_cache::GpuCacheTexture,
+    vertex_data_textures: Vec<vertex::VertexDataTextures>,
+    current_vertex_data_textures: usize,
+
+    /// When the GPU cache debugger is enabled, we keep track of the live blocks
+    /// in the GPU cache so that we can use them for the debug display. This
+    /// member stores those live blocks, indexed by row.
+    gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>,
+
+    gpu_cache_frame_id: FrameId,
+    gpu_cache_overflow: bool,
+
+    pipeline_info: PipelineInfo,
+
+    // Manages and resolves source textures IDs to real texture IDs.
+    texture_resolver: TextureResolver,
+
+    texture_upload_pbo_pool: UploadPBOPool,
+    staging_texture_pool: UploadTexturePool,
+
+    dither_matrix_texture: Option<Texture>,
+
+    /// Optional trait object that allows the client
+    /// application to provide external buffers for image data.
+    external_image_handler: Option<Box<dyn ExternalImageHandler>>,
+
+    /// Optional function pointers for measuring memory used by a given
+    /// heap-allocated pointer.
+    size_of_ops: Option<MallocSizeOfOps>,
+
+    pub renderer_errors: Vec<RendererError>,
+
+    pub(in crate) async_frame_recorder: Option<AsyncScreenshotGrabber>,
+    pub(in crate) async_screenshots: Option<AsyncScreenshotGrabber>,
+
+    /// List of profile results from previous frames. Can be retrieved
+    /// via get_frame_profiles().
+    cpu_profiles: VecDeque<CpuProfile>,
+    gpu_profiles: VecDeque<GpuProfile>,
+
+    /// Notification requests to be fulfilled after rendering.
+    notifications: Vec<NotificationRequest>,
+
+    device_size: Option<DeviceIntSize>,
+
+    /// A lazily created texture for the zoom debugging widget.
+    zoom_debug_texture: Option<Texture>,
+
+    /// The current mouse position. This is used for debugging
+    /// functionality only, such as the debug zoom widget.
+    cursor_position: DeviceIntPoint,
+
+    /// Guards to check if we might be rendering a frame with expired texture
+    /// cache entries.
+    shared_texture_cache_cleared: bool,
+
+    /// The set of documents which we've seen a publish for since last render.
+    documents_seen: FastHashSet<DocumentId>,
+
+    #[cfg(feature = "capture")]
+    read_fbo: FBOId,
+    #[cfg(feature = "replay")]
+    owned_external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
+
+    /// The compositing config, affecting how WR composites into the final scene.
+    compositor_config: CompositorConfig,
+
+    current_compositor_kind: CompositorKind,
+
+    /// Maintains a set of allocated native composite surfaces. This allows any
+    /// currently allocated surfaces to be cleaned up as soon as deinit() is
+    /// called (the normal bookkeeping for native surfaces exists in the
+    /// render backend thread).
+    allocated_native_surfaces: FastHashSet<NativeSurfaceId>,
+
+    /// If true, partial present state has been reset and everything needs to
+    /// be drawn on the next render.
+    force_redraw: bool,
+
+    /// State related to the debug / profiling overlays
+    debug_overlay_state: DebugOverlayState,
+
+    /// Tracks the dirty rectangles from previous frames. Used on platforms
+    /// that require keeping the front buffer fully correct when doing
+    /// partial present (e.g. unix desktop with EGL_EXT_buffer_age).
+    buffer_damage_tracker: BufferDamageTracker,
+
+    max_primitive_instance_count: usize,
+    enable_instancing: bool,
+
+    /// Count consecutive oom frames to detectif we are stuck unable to render
+    /// in a loop.
+    consecutive_oom_frames: u32,
+
+    /// update() defers processing of ResultMsg, if frame_publish_id of
+    /// ResultMsg::PublishDocument exceeds target_frame_publish_id.
+    target_frame_publish_id: Option<FramePublishId>,
+
+    /// Hold a next ResultMsg that will be handled by update().
+    pending_result_msg: Option<ResultMsg>,
+}
+
+#[derive(Debug)]
+pub enum RendererError {
+    Shader(ShaderError),
+    Thread(std::io::Error),
+    MaxTextureSize,
+    SoftwareRasterizer,
+    OutOfMemory,
+}
+
+impl From<ShaderError> for RendererError {
+    fn from(err: ShaderError) -> Self {
+        RendererError::Shader(err)
+    }
+}
+
+impl From<std::io::Error> for RendererError {
+    fn from(err: std::io::Error) -> Self {
+        RendererError::Thread(err)
+    }
+}
+
+impl Renderer {
+    pub fn device_size(&self) -> Option<DeviceIntSize> {
+        self.device_size
+    }
+
+    /// Update the current position of the debug cursor.
+    pub fn set_cursor_position(
+        &mut self,
+        position: DeviceIntPoint,
+    ) {
+        self.cursor_position = position;
+    }
+
+    pub fn get_max_texture_size(&self) -> i32 {
+        self.device.max_texture_size()
+    }
+
+    pub fn get_graphics_api_info(&self) -> GraphicsApiInfo {
+        GraphicsApiInfo {
+            kind: GraphicsApi::OpenGL,
+            version: self.device.gl().get_string(gl::VERSION),
+            renderer: self.device.gl().get_string(gl::RENDERER),
+        }
+    }
+
+    pub fn preferred_color_format(&self) -> ImageFormat {
+        self.device.preferred_color_formats().external
+    }
+
+    pub fn required_texture_stride_alignment(&self, format: ImageFormat) -> usize {
+        self.device.required_pbo_stride().num_bytes(format).get()
+    }
+
+    pub fn set_clear_color(&mut self, color: ColorF) {
+        self.clear_color = color;
+    }
+
+    pub fn flush_pipeline_info(&mut self) -> PipelineInfo {
+        mem::replace(&mut self.pipeline_info, PipelineInfo::default())
+    }
+
+    /// Returns the Epoch of the current frame in a pipeline.
+    pub fn current_epoch(&self, document_id: DocumentId, pipeline_id: PipelineId) -> Option<Epoch> {
+        self.pipeline_info.epochs.get(&(pipeline_id, document_id)).cloned()
+    }
+
+    fn get_next_result_msg(&mut self) -> Option<ResultMsg> {
+        if self.pending_result_msg.is_none() {
+            if let Ok(msg) = self.result_rx.try_recv() {
+                self.pending_result_msg = Some(msg);
+            }
+        }
+
+        match (&self.pending_result_msg, &self.target_frame_publish_id) {
+          (Some(ResultMsg::PublishDocument(frame_publish_id, _, _, _)), Some(target_id)) => {
+            if frame_publish_id > target_id {
+              return None;
+            }
+          }
+          _ => {}
+        }
+
+        self.pending_result_msg.take()
+    }
+
+    /// Processes the result queue.
+    ///
+    /// Should be called before `render()`, as texture cache updates are done here.
+    pub fn update(&mut self) {
+        profile_scope!("update");
+
+        // Pull any pending results and return the most recent.
+        while let Some(msg) = self.get_next_result_msg() {
+            match msg {
+                ResultMsg::PublishPipelineInfo(mut pipeline_info) => {
+                    for ((pipeline_id, document_id), epoch) in pipeline_info.epochs {
+                        self.pipeline_info.epochs.insert((pipeline_id, document_id), epoch);
+                    }
+                    self.pipeline_info.removed_pipelines.extend(pipeline_info.removed_pipelines.drain(..));
+                }
+                ResultMsg::PublishDocument(
+                    _,
+                    document_id,
+                    mut doc,
+                    resource_update_list,
+                ) => {
+                    // Add a new document to the active set
+
+                    // If the document we are replacing must be drawn (in order to
+                    // update the texture cache), issue a render just to
+                    // off-screen targets, ie pass None to render_impl. We do this
+                    // because a) we don't need to render to the main framebuffer
+                    // so it is cheaper not to, and b) doing so without a
+                    // subsequent present would break partial present.
+                    if let Some(mut prev_doc) = self.active_documents.remove(&document_id) {
+                        doc.profile.merge(&mut prev_doc.profile);
+
+                        if prev_doc.frame.must_be_drawn() {
+                            prev_doc.render_reasons |= RenderReasons::TEXTURE_CACHE_FLUSH;
+                            self.render_impl(
+                                document_id,
+                                &mut prev_doc,
+                                None,
+                                0,
+                            ).ok();
+                        }
+                    }
+
+                    self.active_documents.insert(document_id, doc);
+
+                    // IMPORTANT: The pending texture cache updates must be applied
+                    //            *after* the previous frame has been rendered above
+                    //            (if neceessary for a texture cache update). For
+                    //            an example of why this is required:
+                    //            1) Previous frame contains a render task that
+                    //               targets Texture X.
+                    //            2) New frame contains a texture cache update which
+                    //               frees Texture X.
+                    //            3) bad stuff happens.
+
+                    //TODO: associate `document_id` with target window
+                    self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty();
+                    self.pending_texture_updates.push(resource_update_list.texture_updates);
+                    self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates);
+                    self.documents_seen.insert(document_id);
+                }
+                ResultMsg::UpdateGpuCache(mut list) => {
+                    if list.clear {
+                        self.pending_gpu_cache_clear = true;
+                    }
+                    if list.clear {
+                        self.gpu_cache_debug_chunks = Vec::new();
+                    }
+                    for cmd in mem::replace(&mut list.debug_commands, Vec::new()) {
+                        match cmd {
+                            GpuCacheDebugCmd::Alloc(chunk) => {
+                                let row = chunk.address.v as usize;
+                                if row >= self.gpu_cache_debug_chunks.len() {
+                                    self.gpu_cache_debug_chunks.resize(row + 1, Vec::new());
+                                }
+                                self.gpu_cache_debug_chunks[row].push(chunk);
+                            },
+                            GpuCacheDebugCmd::Free(address) => {
+                                let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize];
+                                let pos = chunks.iter()
+                                    .position(|x| x.address == address).unwrap();
+                                chunks.remove(pos);
+                            },
+                        }
+                    }
+                    self.pending_gpu_cache_updates.push(list);
+                }
+                ResultMsg::UpdateResources {
+                    resource_updates,
+                    memory_pressure,
+                } => {
+                    if memory_pressure {
+                        // If a memory pressure event arrives _after_ a new scene has
+                        // been published that writes persistent targets (i.e. cached
+                        // render tasks to the texture cache, or picture cache tiles)
+                        // but _before_ the next update/render loop, those targets
+                        // will not be updated due to the active_documents list being
+                        // cleared at the end of this message. To work around that,
+                        // if any of the existing documents have not rendered yet, and
+                        // have picture/texture cache targets, force a render so that
+                        // those targets are updated.
+                        let active_documents = mem::replace(
+                            &mut self.active_documents,
+                            FastHashMap::default(),
+                        );
+                        for (doc_id, mut doc) in active_documents {
+                            if doc.frame.must_be_drawn() {
+                                // As this render will not be presented, we must pass None to
+                                // render_impl. This avoids interfering with partial present
+                                // logic, as well as being more efficient.
+                                self.render_impl(
+                                    doc_id,
+                                    &mut doc,
+                                    None,
+                                    0,
+                                ).ok();
+                            }
+                        }
+                    }
+
+                    self.pending_texture_cache_updates |= !resource_updates.texture_updates.updates.is_empty();
+                    self.pending_texture_updates.push(resource_updates.texture_updates);
+                    self.pending_native_surface_updates.extend(resource_updates.native_surface_updates);
+                    self.device.begin_frame();
+
+                    self.update_texture_cache();
+                    self.update_native_surfaces();
+
+                    // Flush the render target pool on memory pressure.
+                    //
+                    // This needs to be separate from the block below because
+                    // the device module asserts if we delete textures while
+                    // not in a frame.
+                    if memory_pressure {
+                        self.texture_upload_pbo_pool.on_memory_pressure(&mut self.device);
+                        self.staging_texture_pool.delete_textures(&mut self.device);
+                    }
+
+                    self.device.end_frame();
+                }
+                ResultMsg::AppendNotificationRequests(mut notifications) => {
+                    // We need to know specifically if there are any pending
+                    // TextureCacheUpdate updates in any of the entries in
+                    // pending_texture_updates. They may simply be nops, which do not
+                    // need to prevent issuing the notification, and if so, may not
+                    // cause a timely frame render to occur to wake up any listeners.
+                    if !self.pending_texture_cache_updates {
+                        drain_filter(
+                            &mut notifications,
+                            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
+                            |n| { n.notify(); },
+                        );
+                    }
+                    self.notifications.append(&mut notifications);
+                }
+                ResultMsg::ForceRedraw => {
+                    self.force_redraw = true;
+                }
+                ResultMsg::RefreshShader(path) => {
+                    self.pending_shader_updates.push(path);
+                }
+                ResultMsg::SetParameter(ref param) => {
+                    self.device.set_parameter(param);
+                }
+                ResultMsg::DebugOutput(output) => match output {
+                    #[cfg(feature = "capture")]
+                    DebugOutput::SaveCapture(config, deferred) => {
+                        self.save_capture(config, deferred);
+                    }
+                    #[cfg(feature = "replay")]
+                    DebugOutput::LoadCapture(config, plain_externals) => {
+                        self.active_documents.clear();
+                        self.load_capture(config, plain_externals);
+                    }
+                },
+                ResultMsg::DebugCommand(command) => {
+                    self.handle_debug_command(command);
+                }
+            }
+        }
+    }
+
+    /// update() defers processing of ResultMsg, if frame_publish_id of
+    /// ResultMsg::PublishDocument exceeds target_frame_publish_id.
+    pub fn set_target_frame_publish_id(&mut self, publish_id: FramePublishId) {
+        self.target_frame_publish_id = Some(publish_id);
+    }
+
+    fn handle_debug_command(&mut self, command: DebugCommand) {
+        match command {
+            DebugCommand::SetPictureTileSize(_) |
+            DebugCommand::SetMaximumSurfaceSize(_) => {
+                panic!("Should be handled by render backend");
+            }
+            DebugCommand::SaveCapture(..) |
+            DebugCommand::LoadCapture(..) |
+            DebugCommand::StartCaptureSequence(..) |
+            DebugCommand::StopCaptureSequence => {
+                panic!("Capture commands are not welcome here! Did you build with 'capture' feature?")
+            }
+            DebugCommand::ClearCaches(_)
+            | DebugCommand::SimulateLongSceneBuild(_)
+            | DebugCommand::EnableNativeCompositor(_)
+            | DebugCommand::SetBatchingLookback(_) => {}
+            DebugCommand::InvalidateGpuCache => {
+                self.gpu_cache_texture.invalidate();
+            }
+            DebugCommand::SetFlags(flags) => {
+                self.set_debug_flags(flags);
+            }
+        }
+    }
+
+    /// Set a callback for handling external images.
+    pub fn set_external_image_handler(&mut self, handler: Box<dyn ExternalImageHandler>) {
+        self.external_image_handler = Some(handler);
+    }
+
+    /// Retrieve (and clear) the current list of recorded frame profiles.
+    pub fn get_frame_profiles(&mut self) -> (Vec<CpuProfile>, Vec<GpuProfile>) {
+        let cpu_profiles = self.cpu_profiles.drain(..).collect();
+        let gpu_profiles = self.gpu_profiles.drain(..).collect();
+        (cpu_profiles, gpu_profiles)
+    }
+
+    /// Reset the current partial present state. This forces the entire framebuffer
+    /// to be refreshed next time `render` is called.
+    pub fn force_redraw(&mut self) {
+        self.force_redraw = true;
+    }
+
+    /// Renders the current frame.
+    ///
+    /// A Frame is supplied by calling [`generate_frame()`][webrender_api::Transaction::generate_frame].
+    /// buffer_age is the age of the current backbuffer. It is only relevant if partial present
+    /// is active, otherwise 0 should be passed here.
+    pub fn render(
+        &mut self,
+        device_size: DeviceIntSize,
+        buffer_age: usize,
+    ) -> Result<RenderResults, Vec<RendererError>> {
+        self.device_size = Some(device_size);
+
+        // TODO(gw): We want to make the active document that is
+        //           being rendered configurable via the public
+        //           API in future. For now, just select the last
+        //           added document as the active one to render
+        //           (Gecko only ever creates a single document
+        //           per renderer right now).
+        let doc_id = self.active_documents.keys().last().cloned();
+
+        let result = match doc_id {
+            Some(doc_id) => {
+                // Remove the doc from the map to appease the borrow checker
+                let mut doc = self.active_documents
+                    .remove(&doc_id)
+                    .unwrap();
+
+                let result = self.render_impl(
+                    doc_id,
+                    &mut doc,
+                    Some(device_size),
+                    buffer_age,
+                );
+
+                self.active_documents.insert(doc_id, doc);
+
+                result
+            }
+            None => {
+                self.last_time = precise_time_ns();
+                Ok(RenderResults::default())
+            }
+        };
+
+        drain_filter(
+            &mut self.notifications,
+            |n| { n.when() == Checkpoint::FrameRendered },
+            |n| { n.notify(); },
+        );
+
+        let mut oom = false;
+        if let Err(ref errors) = result {
+            for error in errors {
+                if matches!(error, &RendererError::OutOfMemory) {
+                    oom = true;
+                    break;
+                }
+            }
+        }
+
+        if oom {
+            let _ = self.api_tx.send(ApiMsg::MemoryPressure);
+            // Ensure we don't get stuck in a loop.
+            self.consecutive_oom_frames += 1;
+            assert!(self.consecutive_oom_frames < 5, "Renderer out of memory");
+        } else {
+            self.consecutive_oom_frames = 0;
+        }
+
+        // This is the end of the rendering pipeline. If some notifications are is still there,
+        // just clear them and they will autimatically fire the Checkpoint::TransactionDropped
+        // event. Otherwise they would just pile up in this vector forever.
+        self.notifications.clear();
+
+        tracy_frame_marker!();
+
+        result
+    }
+
+    /// Update the state of any debug / profiler overlays. This is currently only needed
+    /// when running with the native compositor enabled.
+    fn update_debug_overlay(
+        &mut self,
+        framebuffer_size: DeviceIntSize,
+        has_debug_items: bool,
+    ) {
+        // If any of the following debug flags are set, something will be drawn on the debug overlay.
+        self.debug_overlay_state.is_enabled = has_debug_items || self.debug_flags.intersects(
+            DebugFlags::PROFILER_DBG |
+            DebugFlags::RENDER_TARGET_DBG |
+            DebugFlags::TEXTURE_CACHE_DBG |
+            DebugFlags::EPOCHS |
+            DebugFlags::GPU_CACHE_DBG |
+            DebugFlags::PICTURE_CACHING_DBG |
+            DebugFlags::PRIMITIVE_DBG |
+            DebugFlags::ZOOM_DBG |
+            DebugFlags::WINDOW_VISIBILITY_DBG
+        );
+
+        // Update the debug overlay surface, if we are running in native compositor mode.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            let compositor = self.compositor_config.compositor().unwrap();
+
+            // If there is a current surface, destroy it if we don't need it for this frame, or if
+            // the size has changed.
+            if let Some(current_size) = self.debug_overlay_state.current_size {
+                if !self.debug_overlay_state.is_enabled || current_size != framebuffer_size {
+                    compositor.destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY);
+                    self.debug_overlay_state.current_size = None;
+                }
+            }
+
+            // Allocate a new surface, if we need it and there isn't one.
+            if self.debug_overlay_state.is_enabled && self.debug_overlay_state.current_size.is_none() {
+                compositor.create_surface(
+                    &mut self.device,
+                    NativeSurfaceId::DEBUG_OVERLAY,
+                    DeviceIntPoint::zero(),
+                    framebuffer_size,
+                    false,
+                );
+                compositor.create_tile(
+                    &mut self.device,
+                    NativeTileId::DEBUG_OVERLAY,
+                );
+                self.debug_overlay_state.current_size = Some(framebuffer_size);
+            }
+        }
+    }
+
+    /// Bind a draw target for the debug / profiler overlays, if required.
+    fn bind_debug_overlay(&mut self, device_size: DeviceIntSize) -> Option<DrawTarget> {
+        // Debug overlay setup are only required in native compositing mode
+        if self.debug_overlay_state.is_enabled {
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                let surface_size = self.debug_overlay_state.current_size.unwrap();
+
+                // Ensure old surface is invalidated before binding
+                compositor.invalidate_tile(
+                    &mut self.device,
+                    NativeTileId::DEBUG_OVERLAY,
+                    DeviceIntRect::from_size(surface_size),
+                );
+                // Bind the native surface
+                let surface_info = compositor.bind(
+                    &mut self.device,
+                    NativeTileId::DEBUG_OVERLAY,
+                    DeviceIntRect::from_size(surface_size),
+                    DeviceIntRect::from_size(surface_size),
+                );
+
+                // Bind the native surface to current FBO target
+                let draw_target = DrawTarget::NativeSurface {
+                    offset: surface_info.origin,
+                    external_fbo_id: surface_info.fbo_id,
+                    dimensions: surface_size,
+                };
+                self.device.bind_draw_target(draw_target);
+
+                // When native compositing, clear the debug overlay each frame.
+                self.device.clear_target(
+                    Some([0.0, 0.0, 0.0, 0.0]),
+                    None, // debug renderer does not use depth
+                    None,
+                );
+
+                Some(draw_target)
+            } else {
+                // If we're not using the native compositor, then the default
+                // frame buffer is already bound. Create a DrawTarget for it and
+                // return it.
+                Some(DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left()))
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Unbind the draw target for debug / profiler overlays, if required.
+    fn unbind_debug_overlay(&mut self) {
+        // Debug overlay setup are only required in native compositing mode
+        if self.debug_overlay_state.is_enabled {
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                // Unbind the draw target and add it to the visual tree to be composited
+                compositor.unbind(&mut self.device);
+
+                compositor.add_surface(
+                    &mut self.device,
+                    NativeSurfaceId::DEBUG_OVERLAY,
+                    CompositorSurfaceTransform::identity(),
+                    DeviceIntRect::from_size(
+                        self.debug_overlay_state.current_size.unwrap(),
+                    ),
+                    ImageRendering::Auto,
+                );
+            }
+        }
+    }
+
+    // If device_size is None, don't render to the main frame buffer. This is useful to
+    // update texture cache render tasks but avoid doing a full frame render. If the
+    // render is not going to be presented, then this must be set to None, as performing a
+    // composite without a present will confuse partial present.
+    fn render_impl(
+        &mut self,
+        doc_id: DocumentId,
+        active_doc: &mut RenderedDocument,
+        device_size: Option<DeviceIntSize>,
+        buffer_age: usize,
+    ) -> Result<RenderResults, Vec<RendererError>> {
+        profile_scope!("render");
+        let mut results = RenderResults::default();
+        self.profile.start_time(profiler::RENDERER_TIME);
+
+        self.staging_texture_pool.begin_frame();
+
+        let compositor_kind = active_doc.frame.composite_state.compositor_kind;
+        // CompositorKind is updated
+        if self.current_compositor_kind != compositor_kind {
+            let enable = match (self.current_compositor_kind, compositor_kind) {
+                (CompositorKind::Native { .. }, CompositorKind::Draw { .. }) => {
+                    if self.debug_overlay_state.current_size.is_some() {
+                        self.compositor_config
+                            .compositor()
+                            .unwrap()
+                            .destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY);
+                        self.debug_overlay_state.current_size = None;
+                    }
+                    false
+                }
+                (CompositorKind::Draw { .. }, CompositorKind::Native { .. }) => {
+                    true
+                }
+                (current_compositor_kind, active_doc_compositor_kind) => {
+                    warn!("Compositor mismatch, assuming this is Wrench running. Current {:?}, active {:?}",
+                        current_compositor_kind, active_doc_compositor_kind);
+                    false
+                }
+            };
+
+            if let Some(config) = self.compositor_config.compositor() {
+                config.enable_native_compositor(&mut self.device, enable);
+            }
+            self.current_compositor_kind = compositor_kind;
+        }
+
+        // The texture resolver scope should be outside of any rendering, including
+        // debug rendering. This ensures that when we return render targets to the
+        // pool via glInvalidateFramebuffer, we don't do any debug rendering after
+        // that point. Otherwise, the bind / invalidate / bind logic trips up the
+        // render pass logic in tiled / mobile GPUs, resulting in an extra copy /
+        // resolve step when the debug overlay is enabled.
+        self.texture_resolver.begin_frame();
+
+        if let Some(device_size) = device_size {
+            self.update_gpu_profile(device_size);
+        }
+
+        let cpu_frame_id = {
+            let _gm = self.gpu_profiler.start_marker("begin frame");
+            let frame_id = self.device.begin_frame();
+            self.gpu_profiler.begin_frame(frame_id);
+
+            self.device.disable_scissor();
+            self.device.disable_depth();
+            self.set_blend(false, FramebufferKind::Main);
+            //self.update_shaders();
+
+            self.update_texture_cache();
+            self.update_native_surfaces();
+
+            frame_id
+        };
+
+        if let Some(device_size) = device_size {
+            // Inform the client that we are starting a composition transaction if native
+            // compositing is enabled. This needs to be done early in the frame, so that
+            // we can create debug overlays after drawing the main surfaces.
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                let compositor = self.compositor_config.compositor().unwrap();
+                compositor.begin_frame(&mut self.device);
+            }
+
+            // Update the state of the debug overlay surface, ensuring that
+            // the compositor mode has a suitable surface to draw to, if required.
+            self.update_debug_overlay(device_size, !active_doc.frame.debug_items.is_empty());
+        }
+
+        let frame = &mut active_doc.frame;
+        let profile = &mut active_doc.profile;
+        assert!(self.current_compositor_kind == frame.composite_state.compositor_kind);
+
+        if self.shared_texture_cache_cleared {
+            assert!(self.documents_seen.contains(&doc_id),
+                    "Cleared texture cache without sending new document frame.");
+        }
+
+        match self.prepare_gpu_cache(&frame.deferred_resolves) {
+            Ok(..) => {
+                assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
+                    "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
+                    frame.gpu_cache_frame_id, self.gpu_cache_frame_id);
+
+                {
+                    profile_scope!("gl.flush");
+                    self.device.gl().flush();  // early start on gpu cache updates
+                }
+
+                self.draw_frame(
+                    frame,
+                    device_size,
+                    buffer_age,
+                    &mut results,
+                );
+
+                // TODO(nical): do this automatically by selecting counters in the wr profiler
+                // Profile marker for the number of invalidated picture cache
+                if thread_is_being_profiled() {
+                    let duration = Duration::new(0,0);
+                    if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) {
+                        let message = (n as usize).to_string();
+                        add_text_marker("NumPictureCacheInvalidated", &message, duration);
+                    }
+                }
+
+                if device_size.is_some() {
+                    self.draw_frame_debug_items(&frame.debug_items);
+                }
+
+                self.profile.merge(profile);
+            }
+            Err(e) => {
+                self.renderer_errors.push(e);
+            }
+        }
+
+        self.unlock_external_images(&frame.deferred_resolves);
+
+        let _gm = self.gpu_profiler.start_marker("end frame");
+        self.gpu_profiler.end_frame();
+
+        let debug_overlay = device_size.and_then(|device_size| {
+            // Bind a surface to draw the debug / profiler information to.
+            self.bind_debug_overlay(device_size).map(|draw_target| {
+                self.draw_render_target_debug(&draw_target);
+                self.draw_texture_cache_debug(&draw_target);
+                self.draw_gpu_cache_debug(device_size);
+                self.draw_zoom_debug(device_size);
+                self.draw_epoch_debug();
+                self.draw_window_visibility_debug();
+                draw_target
+            })
+        });
+
+        let t = self.profile.end_time(profiler::RENDERER_TIME);
+        self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME);
+        Telemetry::record_renderer_time(Duration::from_micros((t * 1000.00) as u64));
+        if self.profile.get(profiler::SHADER_BUILD_TIME).is_none() {
+          Telemetry::record_renderer_time_no_sc(Duration::from_micros((t * 1000.00) as u64));
+        }
+
+        let current_time = precise_time_ns();
+        if device_size.is_some() {
+            let time = profiler::ns_to_ms(current_time - self.last_time);
+            self.profile.set(profiler::FRAME_TIME, time);
+        }
+
+        if self.max_recorded_profiles > 0 {
+            while self.cpu_profiles.len() >= self.max_recorded_profiles {
+                self.cpu_profiles.pop_front();
+            }
+            let cpu_profile = CpuProfile::new(
+                cpu_frame_id,
+                (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64,
+                (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64,
+                self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize,
+            );
+            self.cpu_profiles.push_back(cpu_profile);
+        }
+
+        if thread_is_being_profiled() {
+            let duration = Duration::new(0,0);
+            let message = (self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize).to_string();
+            add_text_marker("NumDrawCalls", &message, duration);
+        }
+
+        let report = self.texture_resolver.report_memory();
+        self.profile.set(profiler::RENDER_TARGET_MEM, profiler::bytes_to_mb(report.render_target_textures));
+        self.profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(report.picture_tile_textures));
+        self.profile.set(profiler::ATLAS_TEXTURES_MEM, profiler::bytes_to_mb(report.atlas_textures));
+        self.profile.set(profiler::STANDALONE_TEXTURES_MEM, profiler::bytes_to_mb(report.standalone_textures));
+
+        self.profile.set(profiler::DEPTH_TARGETS_MEM, profiler::bytes_to_mb(self.device.depth_targets_memory()));
+
+        self.profile.set(profiler::TEXTURES_CREATED, self.device.textures_created);
+        self.profile.set(profiler::TEXTURES_DELETED, self.device.textures_deleted);
+
+        results.stats.texture_upload_mb = self.profile.get_or(profiler::TEXTURE_UPLOADS_MEM, 0.0);
+        self.frame_counter += 1;
+        results.stats.resource_upload_time = self.resource_upload_time;
+        self.resource_upload_time = 0.0;
+        results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time;
+        self.gpu_cache_upload_time = 0.0;
+
+        if let Some(stats) = active_doc.frame_stats.take() {
+          // Copy the full frame stats to RendererStats
+          results.stats.merge(&stats);
+
+          self.profiler.update_frame_stats(stats);
+        }
+
+        // Turn the render reasons bitflags into something we can see in the profiler.
+        // For now this is just a binary yes/no for each bit, which means that when looking
+        // at "Render reasons" in the profiler HUD the average view indicates the proportion
+        // of frames that had the bit set over a half second window whereas max shows whether
+        // the bit as been set at least once during that time window.
+        // We could implement better ways to visualize this information.
+        let add_markers = thread_is_being_profiled();
+        for i in 0..RenderReasons::NUM_BITS {
+            let counter = profiler::RENDER_REASON_FIRST + i as usize;
+            let mut val = 0.0;
+            let reason_bit = RenderReasons::from_bits_truncate(1 << i);
+            if active_doc.render_reasons.contains(reason_bit) {
+                val = 1.0;
+                if add_markers {
+                    let event_str = format!("Render reason {:?}", reason_bit);
+                    add_event_marker(&event_str);
+                }
+            }
+            self.profile.set(counter, val);
+        }
+        active_doc.render_reasons = RenderReasons::empty();
+
+
+        self.texture_resolver.update_profile(&mut self.profile);
+
+        // Note: this clears the values in self.profile.
+        self.profiler.set_counters(&mut self.profile);
+
+        // Note: profile counters must be set before this or they will count for next frame.
+        self.profiler.update();
+
+        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
+            if let Some(device_size) = device_size {
+                //TODO: take device/pixel ratio into equation?
+                if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                    self.profiler.draw_profile(
+                        self.frame_counter,
+                        debug_renderer,
+                        device_size,
+                    );
+                }
+            }
+        }
+
+        if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) {
+            self.device.echo_driver_messages();
+        }
+
+        if let Some(debug_renderer) = self.debug.try_get_mut() {
+            let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN);
+            let scale = if small_screen { 1.6 } else { 1.0 };
+            // TODO(gw): Tidy this up so that compositor config integrates better
+            //           with the (non-compositor) surface y-flip options.
+            let surface_origin_is_top_left = match self.current_compositor_kind {
+                CompositorKind::Native { .. } => true,
+                CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(),
+            };
+            // If there is a debug overlay, render it. Otherwise, just clear
+            // the debug renderer.
+            debug_renderer.render(
+                &mut self.device,
+                debug_overlay.and(device_size),
+                scale,
+                surface_origin_is_top_left,
+            );
+        }
+
+        self.staging_texture_pool.end_frame(&mut self.device);
+        self.texture_upload_pbo_pool.end_frame(&mut self.device);
+        self.device.end_frame();
+
+        if debug_overlay.is_some() {
+            self.last_time = current_time;
+
+            // Unbind the target for the debug overlay. No debug or profiler drawing
+            // can occur afer this point.
+            self.unbind_debug_overlay();
+        }
+
+        if device_size.is_some() {
+            // Inform the client that we are finished this composition transaction if native
+            // compositing is enabled. This must be called after any debug / profiling compositor
+            // surfaces have been drawn and added to the visual tree.
+            if let CompositorKind::Native { .. } = self.current_compositor_kind {
+                profile_scope!("compositor.end_frame");
+                let compositor = self.compositor_config.compositor().unwrap();
+                compositor.end_frame(&mut self.device);
+            }
+        }
+
+        self.documents_seen.clear();
+        self.shared_texture_cache_cleared = false;
+
+        self.check_gl_errors();
+
+        if self.renderer_errors.is_empty() {
+            Ok(results)
+        } else {
+            Err(mem::replace(&mut self.renderer_errors, Vec::new()))
+        }
+    }
+
+    fn update_gpu_profile(&mut self, device_size: DeviceIntSize) {
+        let _gm = self.gpu_profiler.start_marker("build samples");
+        // Block CPU waiting for last frame's GPU profiles to arrive.
+        // In general this shouldn't block unless heavily GPU limited.
+        let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples();
+
+        if self.max_recorded_profiles > 0 {
+            while self.gpu_profiles.len() >= self.max_recorded_profiles {
+                self.gpu_profiles.pop_front();
+            }
+
+            self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers));
+        }
+
+        self.profiler.set_gpu_time_queries(timers);
+
+        if !samplers.is_empty() {
+            let screen_fraction = 1.0 / device_size.to_f32().area();
+
+            fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 {
+                let mut accum = 0.0;
+                for sampler in samplers {
+                    if sampler.tag.label != description {
+                        continue;
+                    }
+
+                    accum += sampler.count as f32;
+                }
+
+                accum
+            }
+
+            let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction;
+            let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction;
+            let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction;
+            self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets);
+            self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass);
+            self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass);
+            self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass);
+        }
+    }
+
+    fn update_texture_cache(&mut self) {
+        profile_scope!("update_texture_cache");
+
+        let _gm = self.gpu_profiler.start_marker("texture cache update");
+        let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
+        self.pending_texture_cache_updates = false;
+
+        self.profile.start_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
+
+        let mut create_cache_texture_time = 0;
+        let mut delete_cache_texture_time = 0;
+
+        for update_list in pending_texture_updates.drain(..) {
+            // Handle copies from one texture to another.
+            for ((src_tex, dst_tex), copies) in &update_list.copies {
+
+                let dest_texture = &self.texture_resolver.texture_cache_map[&dst_tex].texture;
+                let dst_texture_size = dest_texture.get_dimensions().to_f32();
+
+                let mut copy_instances = Vec::new();
+                for copy in copies {
+                    copy_instances.push(CopyInstance {
+                        src_rect: copy.src_rect.to_f32(),
+                        dst_rect: copy.dst_rect.to_f32(),
+                        dst_texture_size,
+                    });
+                }
+
+                let draw_target = DrawTarget::from_texture(dest_texture, false);
+                self.device.bind_draw_target(draw_target);
+
+                self.shaders
+                    .borrow_mut()
+                    .ps_copy
+                    .bind(
+                        &mut self.device,
+                        &Transform3D::identity(),
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+
+                self.draw_instanced_batch(
+                    &copy_instances,
+                    VertexArrayKind::Copy,
+                    &BatchTextures::composite_rgb(
+                        TextureSource::TextureCache(*src_tex, Swizzle::default())
+                    ),
+                    &mut RendererStats::default(),
+                );
+            }
+
+            // Find any textures that will need to be deleted in this group of allocations.
+            let mut pending_deletes = Vec::new();
+            for allocation in &update_list.allocations {
+                let old = self.texture_resolver.texture_cache_map.remove(&allocation.id);
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(_) => {
+                        assert!(old.is_none(), "Renderer and backend disagree!");
+                    }
+                    TextureCacheAllocationKind::Reset(_) |
+                    TextureCacheAllocationKind::Free => {
+                        assert!(old.is_some(), "Renderer and backend disagree!");
+                    }
+                }
+                if let Some(old) = old {
+
+                    // Regenerate the cache allocation info so we can search through deletes for reuse.
+                    let size = old.texture.get_dimensions();
+                    let info = TextureCacheAllocInfo {
+                        width: size.width,
+                        height: size.height,
+                        format: old.texture.get_format(),
+                        filter: old.texture.get_filter(),
+                        target: old.texture.get_target(),
+                        is_shared_cache: old.texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE),
+                        has_depth: old.texture.supports_depth(),
+                        category: old.category,
+                    };
+                    pending_deletes.push((old.texture, info));
+                }
+            }
+            // Look for any alloc or reset that has matching alloc info and save it from being deleted.
+            let mut reused_textures = VecDeque::with_capacity(pending_deletes.len());
+            for allocation in &update_list.allocations {
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(ref info) |
+                    TextureCacheAllocationKind::Reset(ref info) => {
+                        reused_textures.push_back(
+                            pending_deletes.iter()
+                                .position(|(_, old_info)| *old_info == *info)
+                                .map(|index| pending_deletes.swap_remove(index).0)
+                        );
+                    }
+                    TextureCacheAllocationKind::Free => {}
+                }
+            }
+
+            // Now that we've saved as many deletions for reuse as we can, actually delete whatever is left.
+            if !pending_deletes.is_empty() {
+                let delete_texture_start = precise_time_ns();
+                for (texture, _) in pending_deletes {
+                    add_event_marker("TextureCacheFree");
+                    self.device.delete_texture(texture);
+                }
+                delete_cache_texture_time += precise_time_ns() - delete_texture_start;
+            }
+
+            for allocation in update_list.allocations {
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(_) => add_event_marker("TextureCacheAlloc"),
+                    TextureCacheAllocationKind::Reset(_) => add_event_marker("TextureCacheReset"),
+                    TextureCacheAllocationKind::Free => {}
+                };
+                match allocation.kind {
+                    TextureCacheAllocationKind::Alloc(ref info) |
+                    TextureCacheAllocationKind::Reset(ref info) => {
+                        let create_cache_texture_start = precise_time_ns();
+                        // Create a new native texture, as requested by the texture cache.
+                        // If we managed to reuse a deleted texture, then prefer that instead.
+                        //
+                        // Ensure no PBO is bound when creating the texture storage,
+                        // or GL will attempt to read data from there.
+                        let mut texture = reused_textures.pop_front().unwrap_or(None).unwrap_or_else(|| {
+                            self.device.create_texture(
+                                info.target,
+                                info.format,
+                                info.width,
+                                info.height,
+                                info.filter,
+                                // This needs to be a render target because some render
+                                // tasks get rendered into the texture cache.
+                                Some(RenderTargetInfo { has_depth: info.has_depth }),
+                            )
+                        });
+
+                        if info.is_shared_cache {
+                            texture.flags_mut()
+                                .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);
+
+                            // On Mali-Gxx devices we use batched texture uploads as it performs much better.
+                            // However, due to another driver bug we must ensure the textures are fully cleared,
+                            // otherwise we get visual artefacts when blitting to the texture cache.
+                            if self.device.use_batched_texture_uploads() &&
+                                !self.device.get_capabilities().supports_render_target_partial_update
+                            {
+                                self.clear_texture(&texture, [0.0; 4]);
+                            }
+
+                            // Textures in the cache generally don't need to be cleared,
+                            // but we do so if the debug display is active to make it
+                            // easier to identify unallocated regions.
+                            if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
+                                self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
+                            }
+                        }
+
+                        create_cache_texture_time += precise_time_ns() - create_cache_texture_start;
+
+                        self.texture_resolver.texture_cache_map.insert(allocation.id, CacheTexture {
+                            texture,
+                            category: info.category,
+                        });
+                    }
+                    TextureCacheAllocationKind::Free => {}
+                };
+            }
+
+            upload_to_texture_cache(self, update_list.updates);
+
+            self.check_gl_errors();
+        }
+
+        if create_cache_texture_time > 0 {
+            self.profile.set(
+                profiler::CREATE_CACHE_TEXTURE_TIME,
+                profiler::ns_to_ms(create_cache_texture_time)
+            );
+        }
+        if delete_cache_texture_time > 0 {
+            self.profile.set(
+                profiler::DELETE_CACHE_TEXTURE_TIME,
+                profiler::ns_to_ms(delete_cache_texture_time)
+            )
+        }
+
+        let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
+        self.resource_upload_time += t;
+        Telemetry::record_texture_cache_update_time(Duration::from_micros((t * 1000.00) as u64));
+
+        drain_filter(
+            &mut self.notifications,
+            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
+            |n| { n.notify(); },
+        );
+    }
+
+    fn check_gl_errors(&mut self) {
+        let err = self.device.gl().get_error();
+        if err == gl::OUT_OF_MEMORY {
+            self.renderer_errors.push(RendererError::OutOfMemory);
+        }
+
+        // Probably should check for other errors?
+    }
+
+    fn bind_textures(&mut self, textures: &BatchTextures) {
+        for i in 0 .. 3 {
+            self.texture_resolver.bind(
+                &textures.input.colors[i],
+                TextureSampler::color(i),
+                &mut self.device,
+            );
+        }
+
+        self.texture_resolver.bind(
+            &textures.clip_mask,
+            TextureSampler::ClipMask,
+            &mut self.device,
+        );
+
+        // TODO: this probably isn't the best place for this.
+        if let Some(ref texture) = self.dither_matrix_texture {
+            self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+        }
+    }
+
+    fn draw_instanced_batch<T: Clone>(
+        &mut self,
+        data: &[T],
+        vertex_array_kind: VertexArrayKind,
+        textures: &BatchTextures,
+        stats: &mut RendererStats,
+    ) {
+        self.bind_textures(textures);
+
+        // If we end up with an empty draw call here, that means we have
+        // probably introduced unnecessary batch breaks during frame
+        // building - so we should be catching this earlier and removing
+        // the batch.
+        debug_assert!(!data.is_empty());
+
+        let vao = &self.vaos[vertex_array_kind];
+        self.device.bind_vao(vao);
+
+        let chunk_size = if self.debug_flags.contains(DebugFlags::DISABLE_BATCHING) {
+            1
+        } else if vertex_array_kind == VertexArrayKind::Primitive {
+            self.max_primitive_instance_count
+        } else {
+            data.len()
+        };
+
+        for chunk in data.chunks(chunk_size) {
+            if self.enable_instancing {
+                self.device
+                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, None);
+                self.device
+                    .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32);
+            } else {
+                self.device
+                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, NonZeroUsize::new(4));
+                self.device
+                    .draw_indexed_triangles(6 * chunk.len() as i32);
+            }
+            self.profile.inc(profiler::DRAW_CALLS);
+            stats.total_draw_calls += 1;
+        }
+
+        self.profile.add(profiler::VERTICES, 6 * data.len());
+    }
+
+    fn handle_readback_composite(
+        &mut self,
+        draw_target: DrawTarget,
+        uses_scissor: bool,
+        backdrop: &RenderTask,
+        readback: &RenderTask,
+    ) {
+        // Extract the rectangle in the backdrop surface's device space of where
+        // we need to read from.
+        let readback_origin = match readback.kind {
+            RenderTaskKind::Readback(ReadbackTask { readback_origin: Some(o), .. }) => o,
+            RenderTaskKind::Readback(ReadbackTask { readback_origin: None, .. }) => {
+                // If this is a dummy readback, just early out. We know that the
+                // clear of the target will ensure the task rect is already zero alpha,
+                // so it won't affect the rendering output.
+                return;
+            }
+            _ => unreachable!(),
+        };
+
+        if uses_scissor {
+            self.device.disable_scissor();
+        }
+
+        let texture_source = TextureSource::TextureCache(
+            readback.get_target_texture(),
+            Swizzle::default(),
+        );
+        let (cache_texture, _) = self.texture_resolver
+            .resolve(&texture_source).expect("bug: no source texture");
+
+        // Before submitting the composite batch, do the
+        // framebuffer readbacks that are needed for each
+        // composite operation in this batch.
+        let readback_rect = readback.get_target_rect();
+        let backdrop_rect = backdrop.get_target_rect();
+        let (backdrop_screen_origin, _) = match backdrop.kind {
+            RenderTaskKind::Picture(ref task_info) => (task_info.content_origin, task_info.device_pixel_scale),
+            _ => panic!("bug: composite on non-picture?"),
+        };
+
+        // Bind the FBO to blit the backdrop to.
+        // Called per-instance in case the FBO changes. The device will skip
+        // the GL call if the requested target is already bound.
+        let cache_draw_target = DrawTarget::from_texture(
+            cache_texture,
+            false,
+        );
+
+        // Get the rect that we ideally want, in space of the parent surface
+        let wanted_rect = DeviceRect::from_origin_and_size(
+            readback_origin,
+            readback_rect.size().to_f32(),
+        );
+
+        // Get the rect that is available on the parent surface. It may be smaller
+        // than desired because this is a picture cache tile covering only part of
+        // the wanted rect and/or because the parent surface was clipped.
+        let avail_rect = DeviceRect::from_origin_and_size(
+            backdrop_screen_origin,
+            backdrop_rect.size().to_f32(),
+        );
+
+        if let Some(int_rect) = wanted_rect.intersection(&avail_rect) {
+            // If there is a valid intersection, work out the correct origins and
+            // sizes of the copy rects, and do the blit.
+            let copy_size = int_rect.size().to_i32();
+
+            let src_origin = backdrop_rect.min.to_f32() +
+                int_rect.min.to_vector() -
+                backdrop_screen_origin.to_vector();
+
+            let src = DeviceIntRect::from_origin_and_size(
+                src_origin.to_i32(),
+                copy_size,
+            );
+
+            let dest_origin = readback_rect.min.to_f32() +
+                int_rect.min.to_vector() -
+                readback_origin.to_vector();
+
+            let dest = DeviceIntRect::from_origin_and_size(
+                dest_origin.to_i32(),
+                copy_size,
+            );
+
+            // Should always be drawing to picture cache tiles or off-screen surface!
+            debug_assert!(!draw_target.is_default());
+            let device_to_framebuffer = Scale::new(1i32);
+
+            self.device.blit_render_target(
+                draw_target.into(),
+                src * device_to_framebuffer,
+                cache_draw_target,
+                dest * device_to_framebuffer,
+                TextureFilter::Linear,
+            );
+        }
+
+        // Restore draw target to current pass render target, and reset
+        // the read target.
+        self.device.bind_draw_target(draw_target);
+        self.device.reset_read_target();
+
+        if uses_scissor {
+            self.device.enable_scissor();
+        }
+    }
+
+    fn handle_resolves(
+        &mut self,
+        resolve_ops: &[ResolveOp],
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        if resolve_ops.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT);
+
+        for resolve_op in resolve_ops {
+            self.handle_resolve(
+                resolve_op,
+                render_tasks,
+                draw_target,
+            );
+        }
+
+        self.device.reset_read_target();
+    }
+
+    fn handle_prims(
+        &mut self,
+        prim_instances: &[PrimitiveInstanceData],
+        mask_instances_fast: &[MaskInstance],
+        mask_instances_slow: &[MaskInstance],
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if prim_instances.is_empty() {
+            return;
+        }
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_INDIRECT_PRIM);
+
+            self.device.disable_depth_write();
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().ps_quad_textured.bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            self.draw_instanced_batch(
+                prim_instances,
+                VertexArrayKind::Primitive,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_INDIRECT_MASK);
+
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_multiply(FramebufferKind::Other);
+
+            if !mask_instances_fast.is_empty() {
+                self.shaders.borrow_mut().ps_mask_fast.bind(
+                    &mut self.device,
+                    projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    mask_instances_fast,
+                    VertexArrayKind::Mask,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+
+            if !mask_instances_slow.is_empty() {
+                self.shaders.borrow_mut().ps_mask.bind(
+                    &mut self.device,
+                    projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    mask_instances_slow,
+                    VertexArrayKind::Mask,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+        }
+    }
+
+    fn handle_blits(
+        &mut self,
+        blits: &[BlitJob],
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        if blits.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT);
+
+        // TODO(gw): For now, we don't bother batching these by source texture.
+        //           If if ever shows up as an issue, we can easily batch them.
+        for blit in blits {
+            let (source, source_rect) = {
+                // A blit from the child render task into this target.
+                // TODO(gw): Support R8 format here once we start
+                //           creating mips for alpha masks.
+                let task = &render_tasks[blit.source];
+                let source_rect = task.get_target_rect();
+                let source_texture = task.get_texture_source();
+
+                (source_texture, source_rect)
+            };
+
+            debug_assert_eq!(source_rect.size(), blit.target_rect.size());
+            let (texture, swizzle) = self.texture_resolver
+                .resolve(&source)
+                .expect("BUG: invalid source texture");
+
+            if swizzle != Swizzle::default() {
+                error!("Swizzle {:?} can't be handled by a blit", swizzle);
+            }
+
+            let read_target = DrawTarget::from_texture(
+                texture,
+                false,
+            );
+
+            self.device.blit_render_target(
+                read_target.into(),
+                read_target.to_framebuffer_rect(source_rect),
+                draw_target,
+                draw_target.to_framebuffer_rect(blit.target_rect),
+                TextureFilter::Linear,
+            );
+        }
+    }
+
+    fn handle_scaling(
+        &mut self,
+        scalings: &FastHashMap<TextureSource, Vec<ScalingInstance>>,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if scalings.is_empty() {
+            return
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SCALE);
+
+        for (source, instances) in scalings {
+            let buffer_kind = source.image_buffer_kind();
+
+            // When the source texture is an external texture, the UV rect is not known
+            // when the external surface descriptor is created, because external textures
+            // are not resolved until the lock() callback is invoked at the start of the
+            // frame render. We must therefore override the source rects now.
+            let uv_override_instances;
+            let instances = match source {
+                TextureSource::External(..) => {
+                    uv_override_instances = instances.iter().map(|instance| {
+                        let texel_rect: TexelRect = self.texture_resolver.get_uv_rect(
+                            &source,
+                            instance.source_rect.cast().into()
+                        ).into();
+                        ScalingInstance {
+                            target_rect: instance.target_rect,
+                            source_rect: DeviceRect::new(texel_rect.uv0, texel_rect.uv1),
+                        }
+                    }).collect::<Vec<_>>();
+                    &uv_override_instances
+                }
+                _ => &instances
+            };
+
+            self.shaders
+                .borrow_mut()
+                .get_scale_shader(buffer_kind)
+                .bind(
+                    &mut self.device,
+                    &projection,
+                    Some(self.texture_resolver.get_texture_size(source).to_f32()),
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+            self.draw_instanced_batch(
+                instances,
+                VertexArrayKind::Scale,
+                &BatchTextures::composite_rgb(*source),
+                stats,
+            );
+        }
+    }
+
+    fn handle_svg_filters(
+        &mut self,
+        textures: &BatchTextures,
+        svg_filters: &[SvgFilterInstance],
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if svg_filters.is_empty() {
+            return;
+        }
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SVG_FILTER);
+
+        self.shaders.borrow_mut().cs_svg_filter.bind(
+            &mut self.device,
+            &projection,
+            None,
+            &mut self.renderer_errors,
+            &mut self.profile,
+        );
+
+        self.draw_instanced_batch(
+            &svg_filters,
+            VertexArrayKind::SvgFilter,
+            textures,
+            stats,
+        );
+    }
+
+    fn handle_resolve(
+        &mut self,
+        resolve_op: &ResolveOp,
+        render_tasks: &RenderTaskGraph,
+        draw_target: DrawTarget,
+    ) {
+        for src_task_id in &resolve_op.src_task_ids {
+            let src_task = &render_tasks[*src_task_id];
+            let src_info = match src_task.kind {
+                RenderTaskKind::Picture(ref info) => info,
+                _ => panic!("bug: not a picture"),
+            };
+            let src_task_rect = src_task.get_target_rect().to_f32();
+
+            let dest_task = &render_tasks[resolve_op.dest_task_id];
+            let dest_info = match dest_task.kind {
+                RenderTaskKind::Picture(ref info) => info,
+                _ => panic!("bug: not a picture"),
+            };
+            let dest_task_rect = dest_task.get_target_rect().to_f32();
+
+            // Get the rect that we ideally want, in space of the parent surface
+            let wanted_rect = DeviceRect::from_origin_and_size(
+                dest_info.content_origin,
+                dest_task_rect.size().to_f32(),
+            ).cast_unit() * dest_info.device_pixel_scale.inverse();
+
+            // Get the rect that is available on the parent surface. It may be smaller
+            // than desired because this is a picture cache tile covering only part of
+            // the wanted rect and/or because the parent surface was clipped.
+            let avail_rect = DeviceRect::from_origin_and_size(
+                src_info.content_origin,
+                src_task_rect.size().to_f32(),
+            ).cast_unit() * src_info.device_pixel_scale.inverse();
+
+            if let Some(device_int_rect) = wanted_rect.intersection(&avail_rect) {
+                let src_int_rect = (device_int_rect * src_info.device_pixel_scale).cast_unit();
+                let dest_int_rect = (device_int_rect * dest_info.device_pixel_scale).cast_unit();
+
+                // If there is a valid intersection, work out the correct origins and
+                // sizes of the copy rects, and do the blit.
+
+                let src_origin = src_task_rect.min.to_f32() +
+                    src_int_rect.min.to_vector() -
+                    src_info.content_origin.to_vector();
+
+                let src = DeviceIntRect::from_origin_and_size(
+                    src_origin.to_i32(),
+                    src_int_rect.size().round().to_i32(),
+                );
+
+                let dest_origin = dest_task_rect.min.to_f32() +
+                    dest_int_rect.min.to_vector() -
+                    dest_info.content_origin.to_vector();
+
+                let dest = DeviceIntRect::from_origin_and_size(
+                    dest_origin.to_i32(),
+                    dest_int_rect.size().round().to_i32(),
+                );
+
+                let texture_source = TextureSource::TextureCache(
+                    src_task.get_target_texture(),
+                    Swizzle::default(),
+                );
+                let (cache_texture, _) = self.texture_resolver
+                    .resolve(&texture_source).expect("bug: no source texture");
+
+                let read_target = ReadTarget::from_texture(cache_texture);
+
+                // Should always be drawing to picture cache tiles or off-screen surface!
+                debug_assert!(!draw_target.is_default());
+                let device_to_framebuffer = Scale::new(1i32);
+
+                self.device.blit_render_target(
+                    read_target,
+                    src * device_to_framebuffer,
+                    draw_target,
+                    dest * device_to_framebuffer,
+                    TextureFilter::Linear,
+                );
+            }
+        }
+    }
+
+    fn draw_picture_cache_target(
+        &mut self,
+        target: &PictureCacheTarget,
+        draw_target: DrawTarget,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_picture_cache_target");
+
+        self.profile.inc(profiler::RENDERED_PICTURE_TILES);
+        let _gm = self.gpu_profiler.start_marker("picture cache target");
+        let framebuffer_kind = FramebufferKind::Other;
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+
+            if self.device.get_capabilities().supports_qcom_tiled_rendering {
+                self.device.gl().start_tiling_qcom(
+                    target.dirty_rect.min.x.max(0) as _,
+                    target.dirty_rect.min.y.max(0) as _,
+                    target.dirty_rect.width() as _,
+                    target.dirty_rect.height() as _,
+                    0,
+                );
+            }
+
+            self.device.enable_depth_write();
+            self.set_blend(false, framebuffer_kind);
+
+            let clear_color = target.clear_color.map(|c| c.to_array());
+            let scissor_rect = if self.device.get_capabilities().supports_render_target_partial_update
+                && (target.dirty_rect != target.valid_rect
+                    || self.device.get_capabilities().prefers_clear_scissor)
+            {
+                Some(target.dirty_rect)
+            } else {
+                None
+            };
+            match scissor_rect {
+                // If updating only a dirty rect within a picture cache target, the
+                // clear must also be scissored to that dirty region.
+                Some(r) if self.clear_caches_with_quads => {
+                    self.device.enable_depth(DepthFunction::Always);
+                    // Save the draw call count so that our reftests don't get confused...
+                    let old_draw_call_count = stats.total_draw_calls;
+                    if clear_color.is_none() {
+                        self.device.disable_color_write();
+                    }
+                    let instance = ClearInstance {
+                        rect: [
+                            r.min.x as f32, r.min.y as f32,
+                            r.max.x as f32, r.max.y as f32,
+                        ],
+                        color: clear_color.unwrap_or([0.0; 4]),
+                    };
+                    self.shaders.borrow_mut().ps_clear.bind(
+                        &mut self.device,
+                        &projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.draw_instanced_batch(
+                        &[instance],
+                        VertexArrayKind::Clear,
+                        &BatchTextures::empty(),
+                        stats,
+                    );
+                    if clear_color.is_none() {
+                        self.device.enable_color_write();
+                    }
+                    stats.total_draw_calls = old_draw_call_count;
+                    self.device.disable_depth();
+                }
+                other => {
+                    let scissor_rect = other.map(|rect| {
+                        draw_target.build_scissor_rect(Some(rect))
+                    });
+                    self.device.clear_target(clear_color, Some(1.0), scissor_rect);
+                }
+            };
+            self.device.disable_depth_write();
+        }
+
+        match target.kind {
+            PictureCacheTargetKind::Draw { ref alpha_batch_container } => {
+                self.draw_alpha_batch_container(
+                    alpha_batch_container,
+                    draw_target,
+                    framebuffer_kind,
+                    projection,
+                    render_tasks,
+                    stats,
+                );
+            }
+            PictureCacheTargetKind::Blit { task_id, sub_rect_offset } => {
+                let src_task = &render_tasks[task_id];
+                let (texture, _swizzle) = self.texture_resolver
+                    .resolve(&src_task.get_texture_source())
+                    .expect("BUG: invalid source texture");
+
+                let src_task_rect = src_task.get_target_rect();
+
+                let p0 = src_task_rect.min + sub_rect_offset;
+                let p1 = p0 + target.dirty_rect.size();
+                let src_rect = DeviceIntRect::new(p0, p1);
+
+                // TODO(gw): In future, it'd be tidier to have the draw target offset
+                //           for DC surfaces handled by `blit_render_target`. However,
+                //           for now they are only ever written to here.
+                let target_rect = target
+                    .dirty_rect
+                    .translate(draw_target.offset().to_vector())
+                    .cast_unit();
+
+                self.device.blit_render_target(
+                    ReadTarget::from_texture(texture),
+                    src_rect.cast_unit(),
+                    draw_target,
+                    target_rect,
+                    TextureFilter::Nearest,
+                );
+            }
+        }
+
+        self.device.invalidate_depth_target();
+        if self.device.get_capabilities().supports_qcom_tiled_rendering {
+            self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM);
+        }
+    }
+
+    /// Draw an alpha batch container into a given draw target. This is used
+    /// by both color and picture cache target kinds.
+    fn draw_alpha_batch_container(
+        &mut self,
+        alpha_batch_container: &AlphaBatchContainer,
+        draw_target: DrawTarget,
+        framebuffer_kind: FramebufferKind,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        let uses_scissor = alpha_batch_container.task_scissor_rect.is_some();
+
+        if uses_scissor {
+            self.device.enable_scissor();
+            let scissor_rect = draw_target.build_scissor_rect(
+                alpha_batch_container.task_scissor_rect,
+            );
+            self.device.set_scissor_rect(scissor_rect)
+        }
+
+        if !alpha_batch_container.opaque_batches.is_empty()
+            && !self.debug_flags.contains(DebugFlags::DISABLE_OPAQUE_PASS) {
+            let _gl = self.gpu_profiler.start_marker("opaque batches");
+            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            self.set_blend(false, framebuffer_kind);
+            //Note: depth equality is needed for split planes
+            self.device.enable_depth(DepthFunction::LessEqual);
+            self.device.enable_depth_write();
+
+            // Draw opaque batches front-to-back for maximum
+            // z-buffer efficiency!
+            for batch in alpha_batch_container
+                .opaque_batches
+                .iter()
+                .rev()
+                {
+                    if should_skip_batch(&batch.key.kind, self.debug_flags) {
+                        continue;
+                    }
+
+                    self.shaders.borrow_mut()
+                        .get(&batch.key, batch.features, self.debug_flags, &self.device)
+                        .bind(
+                            &mut self.device, projection, None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                    self.draw_instanced_batch(
+                        &batch.instances,
+                        VertexArrayKind::Primitive,
+                        &batch.key.textures,
+                        stats
+                    );
+                }
+
+            self.device.disable_depth_write();
+            self.gpu_profiler.finish_sampler(opaque_sampler);
+        } else {
+            self.device.disable_depth();
+        }
+
+        if !alpha_batch_container.alpha_batches.is_empty()
+            && !self.debug_flags.contains(DebugFlags::DISABLE_ALPHA_PASS) {
+            let _gl = self.gpu_profiler.start_marker("alpha batches");
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, framebuffer_kind);
+
+            let mut prev_blend_mode = BlendMode::None;
+            let shaders_rc = self.shaders.clone();
+
+            for batch in &alpha_batch_container.alpha_batches {
+                if should_skip_batch(&batch.key.kind, self.debug_flags) {
+                    continue;
+                }
+
+                let mut shaders = shaders_rc.borrow_mut();
+                let shader = shaders.get(
+                    &batch.key,
+                    batch.features | BatchFeatures::ALPHA_PASS,
+                    self.debug_flags,
+                    &self.device,
+                );
+
+                if batch.key.blend_mode != prev_blend_mode {
+                    match batch.key.blend_mode {
+                        _ if self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) &&
+                            framebuffer_kind == FramebufferKind::Main => {
+                            self.device.set_blend_mode_show_overdraw();
+                        }
+                        BlendMode::None => {
+                            unreachable!("bug: opaque blend in alpha pass");
+                        }
+                        BlendMode::Alpha => {
+                            self.device.set_blend_mode_alpha();
+                        }
+                        BlendMode::PremultipliedAlpha => {
+                            self.device.set_blend_mode_premultiplied_alpha();
+                        }
+                        BlendMode::PremultipliedDestOut => {
+                            self.device.set_blend_mode_premultiplied_dest_out();
+                        }
+                        BlendMode::SubpixelDualSource => {
+                            self.device.set_blend_mode_subpixel_dual_source();
+                        }
+                        BlendMode::SubpixelWithBgColor => {
+                            // Using the three pass "component alpha with font smoothing
+                            // background color" rendering technique:
+                            //
+                            // /webrender/doc/text-rendering.md
+                            //
+                            self.device.set_blend_mode_subpixel_with_bg_color_pass0();
+                            // need to make sure the shader is bound
+                            shader.bind(
+                                &mut self.device,
+                                projection,
+                                None,
+                                &mut self.renderer_errors,
+                                &mut self.profile,
+                            );
+                            self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass0 as _);
+                        }
+                        BlendMode::Advanced(mode) => {
+                            if self.enable_advanced_blend_barriers {
+                                self.device.gl().blend_barrier_khr();
+                            }
+                            self.device.set_blend_mode_advanced(mode);
+                        }
+                        BlendMode::MultiplyDualSource => {
+                            self.device.set_blend_mode_multiply_dual_source();
+                        }
+                        BlendMode::Screen => {
+                            self.device.set_blend_mode_screen();
+                        }
+                        BlendMode::Exclusion => {
+                            self.device.set_blend_mode_exclusion();
+                        }
+                        BlendMode::PlusLighter => {
+                            self.device.set_blend_mode_plus_lighter();
+                        }
+                    }
+                    prev_blend_mode = batch.key.blend_mode;
+                }
+
+                // Handle special case readback for composites.
+                if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, backdrop_id }) = batch.key.kind {
+                    // composites can't be grouped together because
+                    // they may overlap and affect each other.
+                    debug_assert_eq!(batch.instances.len(), 1);
+                    self.handle_readback_composite(
+                        draw_target,
+                        uses_scissor,
+                        &render_tasks[task_id],
+                        &render_tasks[backdrop_id],
+                    );
+                }
+
+                let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                shader.bind(
+                    &mut self.device,
+                    projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &batch.instances,
+                    VertexArrayKind::Primitive,
+                    &batch.key.textures,
+                    stats
+                );
+
+                if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
+                    self.set_blend_mode_subpixel_with_bg_color_pass1(framebuffer_kind);
+                    // re-binding the shader after the blend mode change
+                    shader.bind(
+                        &mut self.device,
+                        projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass1 as _);
+
+                    // When drawing the 2nd and 3rd passes, we know that the VAO, textures etc
+                    // are all set up from the previous draw_instanced_batch call,
+                    // so just issue a draw call here to avoid re-uploading the
+                    // instances and re-binding textures etc.
+                    self.device
+                        .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
+
+                    self.set_blend_mode_subpixel_with_bg_color_pass2(framebuffer_kind);
+                    // re-binding the shader after the blend mode change
+                    shader.bind(
+                        &mut self.device,
+                        projection,
+                        None,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+                    self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass2 as _);
+
+                    self.device
+                        .draw_indexed_triangles_instanced_u16(6, batch.instances.len() as i32);
+                }
+
+                if batch.key.blend_mode == BlendMode::SubpixelWithBgColor {
+                    prev_blend_mode = BlendMode::None;
+                }
+            }
+
+            self.set_blend(false, framebuffer_kind);
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+
+        self.device.disable_depth();
+        if uses_scissor {
+            self.device.disable_scissor();
+        }
+    }
+
+    /// Rasterize any external compositor surfaces that require updating
+    fn update_external_native_surfaces(
+        &mut self,
+        external_surfaces: &[ResolvedExternalSurface],
+        results: &mut RenderResults,
+    ) {
+        if external_surfaces.is_empty() {
+            return;
+        }
+
+        let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+
+        self.device.disable_depth();
+        self.set_blend(false, FramebufferKind::Main);
+
+        for surface in external_surfaces {
+            // See if this surface needs to be updated
+            let (native_surface_id, surface_size) = match surface.update_params {
+                Some(params) => params,
+                None => continue,
+            };
+
+            // When updating an external surface, the entire surface rect is used
+            // for all of the draw, dirty, valid and clip rect parameters.
+            let surface_rect = surface_size.into();
+
+            // Bind the native compositor surface to update
+            let surface_info = self.compositor_config
+                .compositor()
+                .unwrap()
+                .bind(
+                    &mut self.device,
+                    NativeTileId {
+                        surface_id: native_surface_id,
+                        x: 0,
+                        y: 0,
+                    },
+                    surface_rect,
+                    surface_rect,
+                );
+
+            // Bind the native surface to current FBO target
+            let draw_target = DrawTarget::NativeSurface {
+                offset: surface_info.origin,
+                external_fbo_id: surface_info.fbo_id,
+                dimensions: surface_size,
+            };
+            self.device.bind_draw_target(draw_target);
+
+            let projection = Transform3D::ortho(
+                0.0,
+                surface_size.width as f32,
+                0.0,
+                surface_size.height as f32,
+                self.device.ortho_near_plane(),
+                self.device.ortho_far_plane(),
+            );
+
+            let ( textures, instance ) = match surface.color_data {
+                ResolvedExternalSurfaceColorData::Yuv{
+                        ref planes, color_space, format, channel_bit_depth, .. } => {
+
+                    // Bind an appropriate YUV shader for the texture format kind
+                    self.shaders
+                        .borrow_mut()
+                        .get_composite_shader(
+                            CompositeSurfaceFormat::Yuv,
+                            surface.image_buffer_kind,
+                            CompositeFeatures::empty(),
+                        ).bind(
+                            &mut self.device,
+                            &projection,
+                            None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let textures = BatchTextures::composite_yuv(
+                        planes[0].texture,
+                        planes[1].texture,
+                        planes[2].texture,
+                    );
+
+                    // When the texture is an external texture, the UV rect is not known when
+                    // the external surface descriptor is created, because external textures
+                    // are not resolved until the lock() callback is invoked at the start of
+                    // the frame render. To handle this, query the texture resolver for the
+                    // UV rect if it's an external texture, otherwise use the default UV rect.
+                    let uv_rects = [
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                    ];
+
+                    let instance = CompositeInstance::new_yuv(
+                        surface_rect.cast_unit().to_f32(),
+                        surface_rect.to_f32(),
+                        // z-id is not relevant when updating a native compositor surface.
+                        // TODO(gw): Support compositor surfaces without z-buffer, for memory / perf win here.
+                        color_space,
+                        format,
+                        channel_bit_depth,
+                        uv_rects,
+                        CompositorTransform::identity(),
+                    );
+
+                    ( textures, instance )
+                },
+                ResolvedExternalSurfaceColorData::Rgb{ ref plane, .. } => {
+                    self.shaders
+                        .borrow_mut()
+                        .get_composite_shader(
+                            CompositeSurfaceFormat::Rgba,
+                            surface.image_buffer_kind,
+                            CompositeFeatures::empty(),
+                        ).bind(
+                            &mut self.device,
+                            &projection,
+                            None,
+                            &mut self.renderer_errors,
+                            &mut self.profile,
+                        );
+
+                    let textures = BatchTextures::composite_rgb(plane.texture);
+                    let uv_rect = self.texture_resolver.get_uv_rect(&textures.input.colors[0], plane.uv_rect);
+                    let instance = CompositeInstance::new_rgb(
+                        surface_rect.cast_unit().to_f32(),
+                        surface_rect.to_f32(),
+                        PremultipliedColorF::WHITE,
+                        uv_rect,
+                        CompositorTransform::identity(),
+                    );
+
+                    ( textures, instance )
+                },
+            };
+
+            self.draw_instanced_batch(
+                &[instance],
+                VertexArrayKind::Composite,
+                &textures,
+                &mut results.stats,
+            );
+
+            self.compositor_config
+                .compositor()
+                .unwrap()
+                .unbind(&mut self.device);
+        }
+
+        self.gpu_profiler.finish_sampler(opaque_sampler);
+    }
+
+    /// Draw a list of tiles to the framebuffer
+    fn draw_tile_list<'a, I: Iterator<Item = &'a occlusion::Item>>(
+        &mut self,
+        tiles_iter: I,
+        composite_state: &CompositeState,
+        external_surfaces: &[ResolvedExternalSurface],
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        let mut current_shader_params = (
+            CompositeSurfaceFormat::Rgba,
+            ImageBufferKind::Texture2D,
+            CompositeFeatures::empty(),
+            None,
+        );
+        let mut current_textures = BatchTextures::empty();
+        let mut instances = Vec::new();
+
+        self.shaders
+            .borrow_mut()
+            .get_composite_shader(
+                current_shader_params.0,
+                current_shader_params.1,
+                current_shader_params.2,
+            ).bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+        for item in tiles_iter {
+            let tile = &composite_state.tiles[item.key];
+
+            let clip_rect = item.rectangle;
+            let tile_rect = tile.local_rect;
+            let transform = composite_state.get_device_transform(tile.transform_index).into();
+
+            // Work out the draw params based on the tile surface
+            let (instance, textures, shader_params) = match tile.surface {
+                CompositeTileSurface::Color { color } => {
+                    let dummy = TextureSource::Dummy;
+                    let image_buffer_kind = dummy.image_buffer_kind();
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        color.premultiplied(),
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(dummy),
+                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
+                    )
+                }
+                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => {
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        PremultipliedColorF::WHITE,
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(texture),
+                        (
+                            CompositeSurfaceFormat::Rgba,
+                            ImageBufferKind::Texture2D,
+                            features,
+                            None,
+                        ),
+                    )
+                }
+                CompositeTileSurface::ExternalSurface { external_surface_index } => {
+                    let surface = &external_surfaces[external_surface_index.0];
+
+                    match surface.color_data {
+                        ResolvedExternalSurfaceColorData::Yuv{ ref planes, color_space, format, channel_bit_depth, .. } => {
+                            let textures = BatchTextures::composite_yuv(
+                                planes[0].texture,
+                                planes[1].texture,
+                                planes[2].texture,
+                            );
+
+                            // When the texture is an external texture, the UV rect is not known when
+                            // the external surface descriptor is created, because external textures
+                            // are not resolved until the lock() callback is invoked at the start of
+                            // the frame render. To handle this, query the texture resolver for the
+                            // UV rect if it's an external texture, otherwise use the default UV rect.
+                            let uv_rects = [
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                            ];
+
+                            (
+                                CompositeInstance::new_yuv(
+                                    tile_rect,
+                                    clip_rect,
+                                    color_space,
+                                    format,
+                                    channel_bit_depth,
+                                    uv_rects,
+                                    transform,
+                                ),
+                                textures,
+                                (
+                                    CompositeSurfaceFormat::Yuv,
+                                    surface.image_buffer_kind,
+                                    CompositeFeatures::empty(),
+                                    None
+                                ),
+                            )
+                        },
+                        ResolvedExternalSurfaceColorData::Rgb { ref plane, .. } => {
+                            let uv_rect = self.texture_resolver.get_uv_rect(&plane.texture, plane.uv_rect);
+                            let instance = CompositeInstance::new_rgb(
+                                tile_rect,
+                                clip_rect,
+                                PremultipliedColorF::WHITE,
+                                uv_rect,
+                                transform,
+                            );
+                            let features = instance.get_rgb_features();
+                            (
+                                instance,
+                                BatchTextures::composite_rgb(plane.texture),
+                                (
+                                    CompositeSurfaceFormat::Rgba,
+                                    surface.image_buffer_kind,
+                                    features,
+                                    Some(self.texture_resolver.get_texture_size(&plane.texture).to_f32()),
+                                ),
+                            )
+                        },
+                    }
+                }
+                CompositeTileSurface::Clear => {
+                    let dummy = TextureSource::Dummy;
+                    let image_buffer_kind = dummy.image_buffer_kind();
+                    let instance = CompositeInstance::new(
+                        tile_rect,
+                        clip_rect,
+                        PremultipliedColorF::BLACK,
+                        transform,
+                    );
+                    let features = instance.get_rgb_features();
+                    (
+                        instance,
+                        BatchTextures::composite_rgb(dummy),
+                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
+                    )
+                }
+                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => {
+                    unreachable!("bug: found native surface in simple composite path");
+                }
+            };
+
+            // Flush batch if shader params or textures changed
+            let flush_batch = !current_textures.is_compatible_with(&textures) ||
+                shader_params != current_shader_params;
+
+            if flush_batch {
+                if !instances.is_empty() {
+                    self.draw_instanced_batch(
+                        &instances,
+                        VertexArrayKind::Composite,
+                        &current_textures,
+                        stats,
+                    );
+                    instances.clear();
+                }
+            }
+
+            if shader_params != current_shader_params {
+                self.shaders
+                    .borrow_mut()
+                    .get_composite_shader(shader_params.0, shader_params.1, shader_params.2)
+                    .bind(
+                        &mut self.device,
+                        projection,
+                        shader_params.3,
+                        &mut self.renderer_errors,
+                        &mut self.profile,
+                    );
+
+                current_shader_params = shader_params;
+            }
+
+            current_textures = textures;
+
+            // Add instance to current batch
+            instances.push(instance);
+        }
+
+        // Flush the last batch
+        if !instances.is_empty() {
+            self.draw_instanced_batch(
+                &instances,
+                VertexArrayKind::Composite,
+                &current_textures,
+                stats,
+            );
+        }
+    }
+
+    /// Composite picture cache tiles into the framebuffer. This is currently
+    /// the only way that picture cache tiles get drawn. In future, the tiles
+    /// will often be handed to the OS compositor, and this method will be
+    /// rarely used.
+    fn composite_simple(
+        &mut self,
+        composite_state: &CompositeState,
+        draw_target: DrawTarget,
+        projection: &default::Transform3D<f32>,
+        results: &mut RenderResults,
+        partial_present_mode: Option<PartialPresentMode>,
+    ) {
+        let _gm = self.gpu_profiler.start_marker("framebuffer");
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE);
+
+        self.device.bind_draw_target(draw_target);
+        self.device.disable_depth_write();
+        self.device.disable_depth();
+
+        // If using KHR_partial_update, call eglSetDamageRegion.
+        // This must be called exactly once per frame, and prior to any rendering to the main
+        // framebuffer. Additionally, on Mali-G77 we encountered rendering issues when calling
+        // this earlier in the frame, during offscreen render passes. So call it now, immediately
+        // before rendering to the main framebuffer. See bug 1685276 for details.
+        if let Some(partial_present) = self.compositor_config.partial_present() {
+            if let Some(PartialPresentMode::Single { dirty_rect }) = partial_present_mode {
+                partial_present.set_buffer_damage_region(&[dirty_rect.to_i32()]);
+            }
+        }
+
+        let cap = composite_state.tiles.len();
+
+        let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap);
+        let mut clear_tiles = Vec::new();
+
+        for (idx, tile) in composite_state.tiles.iter().enumerate() {
+            // Clear tiles overwrite whatever is under them, so they are treated as opaque.
+            let is_opaque = tile.kind != TileKind::Alpha;
+
+            let device_tile_box = composite_state.get_device_rect(
+                &tile.local_rect,
+                tile.transform_index
+            );
+
+            // Determine a clip rect to apply to this tile, depending on what
+            // the partial present mode is.
+            let partial_clip_rect = match partial_present_mode {
+                Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect,
+                None => device_tile_box,
+            };
+
+            // Simple compositor needs the valid rect in device space to match clip rect
+            let device_valid_rect = composite_state
+                .get_device_rect(&tile.local_valid_rect, tile.transform_index);
+
+            let rect = device_tile_box
+                .intersection_unchecked(&tile.device_clip_rect)
+                .intersection_unchecked(&partial_clip_rect)
+                .intersection_unchecked(&device_valid_rect);
+
+            if rect.is_empty() {
+                continue;
+            }
+
+            if tile.kind == TileKind::Clear {
+                // Clear tiles are specific to how we render the window buttons on
+                // Windows 8. They clobber what's under them so they can be treated as opaque,
+                // but require a different blend state so they will be rendered after the opaque
+                // tiles and before transparent ones.
+                clear_tiles.push(occlusion::Item { rectangle: rect, key: idx });
+                continue;
+            }
+
+            occlusion.add(&rect, is_opaque, idx);
+        }
+
+        // Clear the framebuffer
+        let clear_color = Some(self.clear_color.to_array());
+
+        match partial_present_mode {
+            Some(PartialPresentMode::Single { dirty_rect }) => {
+                // There is no need to clear if the dirty rect is occluded. Additionally,
+                // on Mali-G77 we have observed artefacts when calling glClear (even with
+                // the empty scissor rect set) after calling eglSetDamageRegion with an
+                // empty damage region. So avoid clearing in that case. See bug 1709548.
+                if !dirty_rect.is_empty() && occlusion.test(&dirty_rect) {
+                    // We have a single dirty rect, so clear only that
+                    self.device.clear_target(clear_color,
+                                             None,
+                                             Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32())));
+                }
+            }
+            None => {
+                // Partial present is disabled, so clear the entire framebuffer
+                self.device.clear_target(clear_color,
+                                         None,
+                                         None);
+            }
+        }
+
+        // We are only interested in tiles backed with actual cached pixels so we don't
+        // count clear tiles here.
+        let num_tiles = composite_state.tiles
+            .iter()
+            .filter(|tile| tile.kind != TileKind::Clear).count();
+        self.profile.set(profiler::PICTURE_TILES, num_tiles);
+
+        if !occlusion.opaque_items().is_empty() {
+            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            self.set_blend(false, FramebufferKind::Main);
+            self.draw_tile_list(
+                occlusion.opaque_items().iter(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(opaque_sampler);
+        }
+
+        if !clear_tiles.is_empty() {
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, FramebufferKind::Main);
+            self.device.set_blend_mode_premultiplied_dest_out();
+            self.draw_tile_list(
+                clear_tiles.iter(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+
+        // Draw alpha tiles
+        if !occlusion.alpha_items().is_empty() {
+            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.set_blend(true, FramebufferKind::Main);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main);
+            self.draw_tile_list(
+                occlusion.alpha_items().iter().rev(),
+                &composite_state,
+                &composite_state.external_surfaces,
+                projection,
+                &mut results.stats,
+            );
+            self.gpu_profiler.finish_sampler(transparent_sampler);
+        }
+    }
+
+    fn draw_color_target(
+        &mut self,
+        draw_target: DrawTarget,
+        target: &ColorRenderTarget,
+        clear_depth: Option<f32>,
+        render_tasks: &RenderTaskGraph,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_color_target");
+
+        self.profile.inc(profiler::COLOR_PASSES);
+        let _gm = self.gpu_profiler.start_marker("color target");
+
+        // sanity check for the depth buffer
+        if let DrawTarget::Texture { with_depth, .. } = draw_target {
+            assert!(with_depth >= target.needs_depth());
+        }
+
+        let framebuffer_kind = if draw_target.is_default() {
+            FramebufferKind::Main
+        } else {
+            FramebufferKind::Other
+        };
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+
+            if self.device.get_capabilities().supports_qcom_tiled_rendering {
+                let preserve_mask = match target.clear_color {
+                    Some(_) => 0,
+                    None => gl::COLOR_BUFFER_BIT0_QCOM,
+                };
+                self.device.gl().start_tiling_qcom(
+                    target.used_rect.min.x.max(0) as _,
+                    target.used_rect.min.y.max(0) as _,
+                    target.used_rect.width() as _,
+                    target.used_rect.height() as _,
+                    preserve_mask,
+                );
+            }
+
+            self.device.disable_depth();
+            self.set_blend(false, framebuffer_kind);
+
+            if clear_depth.is_some() {
+                self.device.enable_depth_write();
+            }
+
+            let clear_color = target
+                .clear_color
+                .map(|color| color.to_array());
+
+            let clear_rect = match draw_target {
+                DrawTarget::NativeSurface { .. } => {
+                    unreachable!("bug: native compositor surface in child target");
+                }
+                DrawTarget::Default { rect, total_size, .. } if rect.min == FramebufferIntPoint::zero() && rect.size() == total_size => {
+                    // whole screen is covered, no need for scissor
+                    None
+                }
+                DrawTarget::Default { rect, .. } => {
+                    Some(rect)
+                }
+                DrawTarget::Texture { .. } if self.enable_clear_scissor => {
+                    // TODO(gw): Applying a scissor rect and minimal clear here
+                    // is a very large performance win on the Intel and nVidia
+                    // GPUs that I have tested with. It's possible it may be a
+                    // performance penalty on other GPU types - we should test this
+                    // and consider different code paths.
+                    //
+                    // Note: The above measurements were taken when render
+                    // target slices were minimum 2048x2048. Now that we size
+                    // them adaptively, this may be less of a win (except perhaps
+                    // on a mostly-unused last slice of a large texture array).
+                    Some(draw_target.to_framebuffer_rect(target.used_rect))
+                }
+                DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
+                    None
+                }
+            };
+
+            self.device.clear_target(
+                clear_color,
+                clear_depth,
+                clear_rect,
+            );
+
+            if clear_depth.is_some() {
+                self.device.disable_depth_write();
+            }
+        }
+
+        // Handle any resolves from parent pictures to this target
+        self.handle_resolves(
+            &target.resolve_ops,
+            render_tasks,
+            draw_target,
+        );
+
+        // Handle any blits from the texture cache to this target.
+        self.handle_blits(
+            &target.blits,
+            render_tasks,
+            draw_target,
+        );
+
+        // Draw any blurs for this target.
+        // Blurs are rendered as a standard 2-pass
+        // separable implementation.
+        // TODO(gw): In the future, consider having
+        //           fast path blur shaders for common
+        //           blur radii with fixed weights.
+        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            self.set_blend(false, framebuffer_kind);
+            self.shaders.borrow_mut().cs_blur_rgba8
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+
+            if !target.vertical_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.vertical_blurs,
+                    stats,
+                );
+            }
+
+            if !target.horizontal_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.horizontal_blurs,
+                    stats,
+                );
+            }
+        }
+
+        self.handle_scaling(
+            &target.scalings,
+            projection,
+            stats,
+        );
+
+        for (ref textures, ref filters) in &target.svg_filters {
+            self.handle_svg_filters(
+                textures,
+                filters,
+                projection,
+                stats,
+            );
+        }
+
+        for alpha_batch_container in &target.alpha_batch_containers {
+            self.draw_alpha_batch_container(
+                alpha_batch_container,
+                draw_target,
+                framebuffer_kind,
+                projection,
+                render_tasks,
+                stats,
+            );
+        }
+
+        self.handle_prims(
+            &target.prim_instances,
+            &target.mask_instances_fast,
+            &target.mask_instances_slow,
+            projection,
+            stats,
+        );
+
+        if clear_depth.is_some() {
+            self.device.invalidate_depth_target();
+        }
+        if self.device.get_capabilities().supports_qcom_tiled_rendering {
+            self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM);
+        }
+    }
+
+    fn draw_blurs(
+        &mut self,
+        blurs: &FastHashMap<TextureSource, Vec<BlurInstance>>,
+        stats: &mut RendererStats,
+    ) {
+        for (texture, blurs) in blurs {
+            let textures = BatchTextures::composite_rgb(
+                *texture,
+            );
+
+            self.draw_instanced_batch(
+                blurs,
+                VertexArrayKind::Blur,
+                &textures,
+                stats,
+            );
+        }
+    }
+
+    /// Draw all the instances in a clip batcher list to the current target.
+    fn draw_clip_batch_list(
+        &mut self,
+        list: &ClipBatchList,
+        draw_target: &DrawTarget,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        if self.debug_flags.contains(DebugFlags::DISABLE_CLIP_MASKS) {
+            return;
+        }
+
+        // draw rounded cornered rectangles
+        if !list.slow_rectangles.is_empty() {
+            let _gm2 = self.gpu_profiler.start_marker("slow clip rectangles");
+            self.shaders.borrow_mut().cs_clip_rectangle_slow.bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+            self.draw_instanced_batch(
+                &list.slow_rectangles,
+                VertexArrayKind::ClipRect,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+        if !list.fast_rectangles.is_empty() {
+            let _gm2 = self.gpu_profiler.start_marker("fast clip rectangles");
+            self.shaders.borrow_mut().cs_clip_rectangle_fast.bind(
+                &mut self.device,
+                projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+            self.draw_instanced_batch(
+                &list.fast_rectangles,
+                VertexArrayKind::ClipRect,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // draw box-shadow clips
+        for (mask_texture_id, items) in list.box_shadows.iter() {
+            let _gm2 = self.gpu_profiler.start_marker("box-shadows");
+            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+            self.shaders.borrow_mut().cs_clip_box_shadow
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+            self.draw_instanced_batch(
+                items,
+                VertexArrayKind::ClipBoxShadow,
+                &textures,
+                stats,
+            );
+        }
+
+        // draw image masks
+        let mut using_scissor = false;
+        for ((mask_texture_id, clip_rect), items) in list.images.iter() {
+            let _gm2 = self.gpu_profiler.start_marker("clip images");
+            // Some image masks may require scissoring to ensure they don't draw
+            // outside their task's target bounds. Axis-aligned primitives will
+            // be clamped inside the shader and should not require scissoring.
+            // TODO: We currently assume scissor state is off by default for
+            // alpha targets here, but in the future we may want to track the
+            // current scissor state so that this can be properly saved and
+            // restored here.
+            if let Some(clip_rect) = clip_rect {
+                if !using_scissor {
+                    self.device.enable_scissor();
+                    using_scissor = true;
+                }
+                let scissor_rect = draw_target.build_scissor_rect(Some(*clip_rect));
+                self.device.set_scissor_rect(scissor_rect);
+            } else if using_scissor {
+                self.device.disable_scissor();
+                using_scissor = false;
+            }
+            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+            self.shaders.borrow_mut().cs_clip_image
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+            self.draw_instanced_batch(
+                items,
+                VertexArrayKind::ClipImage,
+                &textures,
+                stats,
+            );
+        }
+        if using_scissor {
+            self.device.disable_scissor();
+        }
+    }
+
+    fn draw_alpha_target(
+        &mut self,
+        draw_target: DrawTarget,
+        target: &AlphaRenderTarget,
+        projection: &default::Transform3D<f32>,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_alpha_target");
+
+        self.profile.inc(profiler::ALPHA_PASSES);
+        let _gm = self.gpu_profiler.start_marker("alpha target");
+        let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA);
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            self.device.bind_draw_target(draw_target);
+            self.device.disable_depth();
+            self.device.disable_depth_write();
+            self.set_blend(false, FramebufferKind::Other);
+
+            let zero_color = [0.0, 0.0, 0.0, 0.0];
+            let one_color = [1.0, 1.0, 1.0, 1.0];
+
+            // On some Adreno 4xx devices we have seen render tasks to alpha targets have no
+            // effect unless the target is fully cleared prior to rendering. See bug 1714227.
+            if self.device.get_capabilities().requires_alpha_target_full_clear {
+                self.device.clear_target(
+                    Some(zero_color),
+                    None,
+                    None,
+                );
+            }
+
+            // On some Mali-T devices we have observed crashes in subsequent draw calls
+            // immediately after clearing the alpha render target regions with glClear().
+            // Using the shader to clear the regions avoids the crash. See bug 1638593.
+            if self.clear_alpha_targets_with_quads
+                && !(target.zero_clears.is_empty() && target.one_clears.is_empty())
+            {
+                let zeroes = target.zero_clears
+                    .iter()
+                    .map(|task_id| {
+                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
+                        ClearInstance {
+                            rect: [
+                                rect.min.x, rect.min.y,
+                                rect.max.x, rect.max.y,
+                            ],
+                            color: zero_color,
+                        }
+                    });
+
+                let ones = target.one_clears
+                    .iter()
+                    .map(|task_id| {
+                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
+                        ClearInstance {
+                            rect: [
+                                rect.min.x, rect.min.y,
+                                rect.max.x, rect.max.y,
+                            ],
+                            color: one_color,
+                        }
+                    });
+
+                let instances = zeroes.chain(ones).collect::<Vec<_>>();
+                self.shaders.borrow_mut().ps_clear.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+                self.draw_instanced_batch(
+                    &instances,
+                    VertexArrayKind::Clear,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            } else {
+                // TODO(gw): Applying a scissor rect and minimal clear here
+                // is a very large performance win on the Intel and nVidia
+                // GPUs that I have tested with. It's possible it may be a
+                // performance penalty on other GPU types - we should test this
+                // and consider different code paths.
+                for &task_id in &target.zero_clears {
+                    let rect = render_tasks[task_id].get_target_rect();
+                    self.device.clear_target(
+                        Some(zero_color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(rect)),
+                    );
+                }
+
+                for &task_id in &target.one_clears {
+                    let rect = render_tasks[task_id].get_target_rect();
+                    self.device.clear_target(
+                        Some(one_color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(rect)),
+                    );
+                }
+            }
+        }
+
+        // Draw any blurs for this target.
+        // Blurs are rendered as a standard 2-pass
+        // separable implementation.
+        // TODO(gw): In the future, consider having
+        //           fast path blur shaders for common
+        //           blur radii with fixed weights.
+        if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            self.shaders.borrow_mut().cs_blur_a8
+                .bind(&mut self.device, projection, None, &mut self.renderer_errors, &mut self.profile);
+
+            if !target.vertical_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.vertical_blurs,
+                    stats,
+                );
+            }
+
+            if !target.horizontal_blurs.is_empty() {
+                self.draw_blurs(
+                    &target.horizontal_blurs,
+                    stats,
+                );
+            }
+        }
+
+        self.handle_scaling(
+            &target.scalings,
+            projection,
+            stats,
+        );
+
+        // Draw the clip items into the tiled alpha mask.
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CLIP);
+
+            // TODO(gw): Consider grouping multiple clip masks per shader
+            //           invocation here to reduce memory bandwith further?
+
+            // Draw the primary clip mask - since this is the first mask
+            // for the task, we can disable blending, knowing that it will
+            // overwrite every pixel in the mask area.
+            self.set_blend(false, FramebufferKind::Other);
+            self.draw_clip_batch_list(
+                &target.clip_batcher.primary_clips,
+                &draw_target,
+                projection,
+                stats,
+            );
+
+            // switch to multiplicative blending for secondary masks, using
+            // multiplicative blending to accumulate clips into the mask.
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_multiply(FramebufferKind::Other);
+            self.draw_clip_batch_list(
+                &target.clip_batcher.secondary_clips,
+                &draw_target,
+                projection,
+                stats,
+            );
+        }
+
+        self.gpu_profiler.finish_sampler(alpha_sampler);
+    }
+
+    fn draw_texture_cache_target(
+        &mut self,
+        texture: &CacheTextureId,
+        target: &TextureCacheRenderTarget,
+        render_tasks: &RenderTaskGraph,
+        stats: &mut RendererStats,
+    ) {
+        profile_scope!("draw_texture_cache_target");
+
+        self.device.disable_depth();
+        self.device.disable_depth_write();
+
+        self.set_blend(false, FramebufferKind::Other);
+
+        let texture = &self.texture_resolver.texture_cache_map[texture].texture;
+        let target_size = texture.get_dimensions();
+
+        let projection = Transform3D::ortho(
+            0.0,
+            target_size.width as f32,
+            0.0,
+            target_size.height as f32,
+            self.device.ortho_near_plane(),
+            self.device.ortho_far_plane(),
+        );
+
+        let draw_target = DrawTarget::from_texture(
+            texture,
+            false,
+        );
+        self.device.bind_draw_target(draw_target);
+
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CLEAR);
+
+            self.device.disable_depth();
+            self.device.disable_depth_write();
+            self.set_blend(false, FramebufferKind::Other);
+
+            let color = [0.0, 0.0, 0.0, 0.0];
+            if self.clear_caches_with_quads && !target.clears.is_empty() {
+                let instances = target.clears
+                    .iter()
+                    .map(|r| ClearInstance {
+                        rect: [
+                            r.min.x as f32, r.min.y as f32,
+                            r.max.x as f32, r.max.y as f32,
+                        ],
+                        color,
+                    })
+                    .collect::<Vec<_>>();
+                self.shaders.borrow_mut().ps_clear.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+                self.draw_instanced_batch(
+                    &instances,
+                    VertexArrayKind::Clear,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            } else {
+                for rect in &target.clears {
+                    self.device.clear_target(
+                        Some(color),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(*rect)),
+                    );
+                }
+            }
+
+            // Handle any blits to this texture from child tasks.
+            self.handle_blits(
+                &target.blits,
+                render_tasks,
+                draw_target,
+            );
+        }
+
+        // Draw any borders for this target.
+        if !target.border_segments_solid.is_empty() ||
+           !target.border_segments_complex.is_empty()
+        {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_BORDER);
+
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
+
+            if !target.border_segments_solid.is_empty() {
+                self.shaders.borrow_mut().cs_border_solid.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &target.border_segments_solid,
+                    VertexArrayKind::Border,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+
+            if !target.border_segments_complex.is_empty() {
+                self.shaders.borrow_mut().cs_border_segment.bind(
+                    &mut self.device,
+                    &projection,
+                    None,
+                    &mut self.renderer_errors,
+                    &mut self.profile,
+                );
+
+                self.draw_instanced_batch(
+                    &target.border_segments_complex,
+                    VertexArrayKind::Border,
+                    &BatchTextures::empty(),
+                    stats,
+                );
+            }
+
+            self.set_blend(false, FramebufferKind::Other);
+        }
+
+        // Draw any line decorations for this target.
+        if !target.line_decorations.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINE_DECORATION);
+
+            self.set_blend(true, FramebufferKind::Other);
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_line_decoration.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            self.draw_instanced_batch(
+                &target.line_decorations,
+                VertexArrayKind::LineDecoration,
+                &BatchTextures::empty(),
+                stats,
+            );
+
+            self.set_blend(false, FramebufferKind::Other);
+        }
+
+        // Draw any fast path linear gradients for this target.
+        if !target.fast_linear_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_FAST_LINEAR_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_fast_linear_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            self.draw_instanced_batch(
+                &target.fast_linear_gradients,
+                VertexArrayKind::FastLinearGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any linear gradients for this target.
+        if !target.linear_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINEAR_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_linear_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.linear_gradients,
+                VertexArrayKind::LinearGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any radial gradients for this target.
+        if !target.radial_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_RADIAL_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_radial_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.radial_gradients,
+                VertexArrayKind::RadialGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any conic gradients for this target.
+        if !target.conic_gradients.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CONIC_GRADIENT);
+
+            self.set_blend(false, FramebufferKind::Other);
+
+            self.shaders.borrow_mut().cs_conic_gradient.bind(
+                &mut self.device,
+                &projection,
+                None,
+                &mut self.renderer_errors,
+                &mut self.profile,
+            );
+
+            if let Some(ref texture) = self.dither_matrix_texture {
+                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
+            }
+
+            self.draw_instanced_batch(
+                &target.conic_gradients,
+                VertexArrayKind::ConicGradient,
+                &BatchTextures::empty(),
+                stats,
+            );
+        }
+
+        // Draw any blurs for this target.
+        if !target.horizontal_blurs.is_empty() {
+            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+
+            {
+                let mut shaders = self.shaders.borrow_mut();
+                match target.target_kind {
+                    RenderTargetKind::Alpha => &mut shaders.cs_blur_a8,
+                    RenderTargetKind::Color => &mut shaders.cs_blur_rgba8,
+                }.bind(&mut self.device, &projection, None, &mut self.renderer_errors, &mut self.profile);
+            }
+
+            self.draw_blurs(
+                &target.horizontal_blurs,
+                stats,
+            );
+        }
+    }
+
+    fn update_deferred_resolves(&mut self, deferred_resolves: &[DeferredResolve]) -> Option<GpuCacheUpdateList> {
+        // The first thing we do is run through any pending deferred
+        // resolves, and use a callback to get the UV rect for this
+        // custom item. Then we patch the resource_rects structure
+        // here before it's uploaded to the GPU.
+        if deferred_resolves.is_empty() {
+            return None;
+        }
+
+        let handler = self.external_image_handler
+            .as_mut()
+            .expect("Found external image, but no handler set!");
+
+        let mut list = GpuCacheUpdateList {
+            frame_id: FrameId::INVALID,
+            clear: false,
+            height: self.gpu_cache_texture.get_height(),
+            blocks: Vec::new(),
+            updates: Vec::new(),
+            debug_commands: Vec::new(),
+        };
+
+        for (i, deferred_resolve) in deferred_resolves.iter().enumerate() {
+            self.gpu_profiler.place_marker("deferred resolve");
+            let props = &deferred_resolve.image_properties;
+            let ext_image = props
+                .external_image
+                .expect("BUG: Deferred resolves must be external images!");
+            // Provide rendering information for NativeTexture external images.
+            let image = handler.lock(ext_image.id, ext_image.channel_index);
+            let texture_target = match ext_image.image_type {
+                ExternalImageType::TextureHandle(target) => target,
+                ExternalImageType::Buffer => {
+                    panic!("not a suitable image type in update_deferred_resolves()");
+                }
+            };
+
+            // In order to produce the handle, the external image handler may call into
+            // the GL context and change some states.
+            self.device.reset_state();
+
+            let texture = match image.source {
+                ExternalImageSource::NativeTexture(texture_id) => {
+                    ExternalTexture::new(
+                        texture_id,
+                        texture_target,
+                        image.uv,
+                        deferred_resolve.rendering,
+                    )
+                }
+                ExternalImageSource::Invalid => {
+                    warn!("Invalid ext-image");
+                    debug!(
+                        "For ext_id:{:?}, channel:{}.",
+                        ext_image.id,
+                        ext_image.channel_index
+                    );
+                    // Just use 0 as the gl handle for this failed case.
+                    ExternalTexture::new(
+                        0,
+                        texture_target,
+                        image.uv,
+                        deferred_resolve.rendering,
+                    )
+                }
+                ExternalImageSource::RawData(_) => {
+                    panic!("Raw external data is not expected for deferred resolves!");
+                }
+            };
+
+            self.texture_resolver
+                .external_images
+                .insert(DeferredResolveIndex(i as u32), texture);
+
+            list.updates.push(GpuCacheUpdate::Copy {
+                block_index: list.blocks.len(),
+                block_count: BLOCKS_PER_UV_RECT,
+                address: deferred_resolve.address,
+            });
+            list.blocks.push(image.uv.into());
+            list.blocks.push([0f32; 4].into());
+        }
+
+        Some(list)
+    }
+
+    fn unlock_external_images(
+        &mut self,
+        deferred_resolves: &[DeferredResolve],
+    ) {
+        if !self.texture_resolver.external_images.is_empty() {
+            let handler = self.external_image_handler
+                .as_mut()
+                .expect("Found external image, but no handler set!");
+
+            for (index, _) in self.texture_resolver.external_images.drain() {
+                let props = &deferred_resolves[index.0 as usize].image_properties;
+                let ext_image = props
+                    .external_image
+                    .expect("BUG: Deferred resolves must be external images!");
+                handler.unlock(ext_image.id, ext_image.channel_index);
+            }
+        }
+    }
+
+    /// Update the dirty rects based on current compositing mode and config
+    // TODO(gw): This can be tidied up significantly once the Draw compositor
+    //           is implemented in terms of the compositor trait.
+    fn calculate_dirty_rects(
+        &mut self,
+        buffer_age: usize,
+        composite_state: &CompositeState,
+        draw_target_dimensions: DeviceIntSize,
+        results: &mut RenderResults,
+    ) -> Option<PartialPresentMode> {
+        let mut partial_present_mode = None;
+
+        let (max_partial_present_rects, draw_previous_partial_present_regions) = match self.current_compositor_kind {
+            CompositorKind::Native { .. } => {
+                // Assume that we can return a single dirty rect for native
+                // compositor for now, and that there is no buffer-age functionality.
+                // These params can be exposed by the compositor capabilities struct
+                // as the Draw compositor is ported to use it.
+                (1, false)
+            }
+            CompositorKind::Draw { draw_previous_partial_present_regions, max_partial_present_rects } => {
+                (max_partial_present_rects, draw_previous_partial_present_regions)
+            }
+        };
+
+        if max_partial_present_rects > 0 {
+            let prev_frames_damage_rect = if let Some(..) = self.compositor_config.partial_present() {
+                self.buffer_damage_tracker
+                    .get_damage_rect(buffer_age)
+                    .or_else(|| Some(DeviceRect::from_size(draw_target_dimensions.to_f32())))
+            } else {
+                None
+            };
+
+            let can_use_partial_present =
+                composite_state.dirty_rects_are_valid &&
+                !self.force_redraw &&
+                !(prev_frames_damage_rect.is_none() && draw_previous_partial_present_regions) &&
+                !self.debug_overlay_state.is_enabled;
+
+            if can_use_partial_present {
+                let mut combined_dirty_rect = DeviceRect::zero();
+                let fb_rect = DeviceRect::from_size(draw_target_dimensions.to_f32());
+
+                // Work out how many dirty rects WR produced, and if that's more than
+                // what the device supports.
+                for tile in &composite_state.tiles {
+                    if tile.kind == TileKind::Clear {
+                        continue;
+                    }
+                    let dirty_rect = composite_state.get_device_rect(
+                        &tile.local_dirty_rect,
+                        tile.transform_index,
+                    );
+
+                    // In pathological cases where a tile is extremely zoomed, it
+                    // may end up with device coords outside the range of an i32,
+                    // so clamp it to the frame buffer rect here, before it gets
+                    // casted to an i32 rect below.
+                    if let Some(dirty_rect) = dirty_rect.intersection(&fb_rect) {
+                        combined_dirty_rect = combined_dirty_rect.union(&dirty_rect);
+                    }
+                }
+
+                let combined_dirty_rect = combined_dirty_rect.round();
+                let combined_dirty_rect_i32 = combined_dirty_rect.to_i32();
+                // Return this frame's dirty region. If nothing has changed, don't return any dirty
+                // rects at all (the client can use this as a signal to skip present completely).
+                if !combined_dirty_rect.is_empty() {
+                    results.dirty_rects.push(combined_dirty_rect_i32);
+                }
+
+                // Track this frame's dirty region, for calculating subsequent frames' damage.
+                if draw_previous_partial_present_regions {
+                    self.buffer_damage_tracker.push_dirty_rect(&combined_dirty_rect);
+                }
+
+                // If the implementation requires manually keeping the buffer consistent,
+                // then we must combine this frame's dirty region with that of previous frames
+                // to determine the total_dirty_rect. The is used to determine what region we
+                // render to, and is what we send to the compositor as the buffer damage region
+                // (eg for KHR_partial_update).
+                let total_dirty_rect = if draw_previous_partial_present_regions {
+                    combined_dirty_rect.union(&prev_frames_damage_rect.unwrap())
+                } else {
+                    combined_dirty_rect
+                };
+
+                partial_present_mode = Some(PartialPresentMode::Single {
+                    dirty_rect: total_dirty_rect,
+                });
+            } else {
+                // If we don't have a valid partial present scenario, return a single
+                // dirty rect to the client that covers the entire framebuffer.
+                let fb_rect = DeviceIntRect::from_size(
+                    draw_target_dimensions,
+                );
+                results.dirty_rects.push(fb_rect);
+
+                if draw_previous_partial_present_regions {
+                    self.buffer_damage_tracker.push_dirty_rect(&fb_rect.to_f32());
+                }
+            }
+
+            self.force_redraw = false;
+        }
+
+        partial_present_mode
+    }
+
+    fn bind_frame_data(&mut self, frame: &mut Frame) {
+        profile_scope!("bind_frame_data");
+
+        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_DATA);
+
+        self.vertex_data_textures[self.current_vertex_data_textures].update(
+            &mut self.device,
+            &mut self.texture_upload_pbo_pool,
+            frame,
+        );
+        self.current_vertex_data_textures =
+            (self.current_vertex_data_textures + 1) % VERTEX_DATA_TEXTURE_COUNT;
+    }
+
+    fn update_native_surfaces(&mut self) {
+        profile_scope!("update_native_surfaces");
+
+        match self.compositor_config {
+            CompositorConfig::Native { ref mut compositor, .. } => {
+                for op in self.pending_native_surface_updates.drain(..) {
+                    match op.details {
+                        NativeSurfaceOperationDetails::CreateSurface { id, virtual_offset, tile_size, is_opaque } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_surface(
+                                    &mut self.device,
+                                    id,
+                                    virtual_offset,
+                                    tile_size,
+                                    is_opaque,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::CreateExternalSurface { id, is_opaque } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_external_surface(
+                                &mut self.device,
+                                id,
+                                is_opaque,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::CreateBackdropSurface { id, color } => {
+                            let _inserted = self.allocated_native_surfaces.insert(id);
+                            debug_assert!(_inserted, "bug: creating existing surface");
+                            compositor.create_backdrop_surface(
+                                &mut self.device,
+                                id,
+                                color,
+                            );
+                        }
+                        NativeSurfaceOperationDetails::DestroySurface { id } => {
+                            let _existed = self.allocated_native_surfaces.remove(&id);
+                            debug_assert!(_existed, "bug: removing unknown surface");
+                            compositor.destroy_surface(&mut self.device, id);
+                        }
+                        NativeSurfaceOperationDetails::CreateTile { id } => {
+                            compositor.create_tile(&mut self.device, id);
+                        }
+                        NativeSurfaceOperationDetails::DestroyTile { id } => {
+                            compositor.destroy_tile(&mut self.device, id);
+                        }
+                        NativeSurfaceOperationDetails::AttachExternalImage { id, external_image } => {
+                            compositor.attach_external_image(&mut self.device, id, external_image);
+                        }
+                    }
+                }
+            }
+            CompositorConfig::Draw { .. } => {
+                // Ensure nothing is added in simple composite mode, since otherwise
+                // memory will leak as this doesn't get drained
+                debug_assert!(self.pending_native_surface_updates.is_empty());
+            }
+        }
+    }
+
+    fn draw_frame(
+        &mut self,
+        frame: &mut Frame,
+        device_size: Option<DeviceIntSize>,
+        buffer_age: usize,
+        results: &mut RenderResults,
+    ) {
+        profile_scope!("draw_frame");
+
+        // These markers seem to crash a lot on Android, see bug 1559834
+        #[cfg(not(target_os = "android"))]
+        let _gm = self.gpu_profiler.start_marker("draw frame");
+
+        if frame.passes.is_empty() {
+            frame.has_been_rendered = true;
+            return;
+        }
+
+        self.device.disable_depth_write();
+        self.set_blend(false, FramebufferKind::Other);
+        self.device.disable_stencil();
+
+        self.bind_frame_data(frame);
+
+        // Upload experimental GPU buffer texture if there is any data present
+        // TODO: Recycle these textures, upload via PBO or best approach for platform
+        let gpu_buffer_texture = if frame.gpu_buffer.is_empty() {
+            None
+        } else {
+            let gpu_buffer_texture = self.device.create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::RGBAF32,
+                frame.gpu_buffer.size.width,
+                frame.gpu_buffer.size.height,
+                TextureFilter::Nearest,
+                None,
+            );
+
+            self.device.bind_texture(
+                TextureSampler::GpuBuffer,
+                &gpu_buffer_texture,
+                Swizzle::default(),
+            );
+
+            self.device.upload_texture_immediate(
+                &gpu_buffer_texture,
+                &frame.gpu_buffer.data,
+            );
+
+            Some(gpu_buffer_texture)
+        };
+
+        // Determine the present mode and dirty rects, if device_size
+        // is Some(..). If it's None, no composite will occur and only
+        // picture cache and texture cache targets will be updated.
+        // TODO(gw): Split Frame so that it's clearer when a composite
+        //           is occurring.
+        let present_mode = device_size.and_then(|device_size| {
+            self.calculate_dirty_rects(
+                buffer_age,
+                &frame.composite_state,
+                device_size,
+                results,
+            )
+        });
+
+        // If we have a native OS compositor, then make use of that interface to
+        // specify how to composite each of the picture cache surfaces. First, we
+        // need to find each tile that may be bound and updated later in the frame
+        // and invalidate it so that the native render compositor knows that these
+        // tiles can't be composited early. Next, after all such tiles have been
+        // invalidated, then we queue surfaces for native composition by the render
+        // compositor before we actually update the tiles. This allows the render
+        // compositor to start early composition while the tiles are updating.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            let compositor = self.compositor_config.compositor().unwrap();
+            // Invalidate any native surface tiles that might be updated by passes.
+            if !frame.has_been_rendered {
+                for tile in &frame.composite_state.tiles {
+                    if tile.kind == TileKind::Clear {
+                        continue;
+                    }
+                    if !tile.local_dirty_rect.is_empty() {
+                        if let CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { id, .. } } = tile.surface {
+                            let valid_rect = frame.composite_state.get_surface_rect(
+                                &tile.local_valid_rect,
+                                &tile.local_rect,
+                                tile.transform_index,
+                            ).to_i32();
+
+                            compositor.invalidate_tile(&mut self.device, id, valid_rect);
+                        }
+                    }
+                }
+            }
+            // Ensure any external surfaces that might be used during early composition
+            // are invalidated first so that the native compositor can properly schedule
+            // composition to happen only when the external surface is updated.
+            // See update_external_native_surfaces for more details.
+            for surface in &frame.composite_state.external_surfaces {
+                if let Some((native_surface_id, size)) = surface.update_params {
+                    let surface_rect = size.into();
+                    compositor.invalidate_tile(&mut self.device, NativeTileId { surface_id: native_surface_id, x: 0, y: 0 }, surface_rect);
+                }
+            }
+            // Finally queue native surfaces for early composition, if applicable. By now,
+            // we have already invalidated any tiles that such surfaces may depend upon, so
+            // the native render compositor can keep track of when to actually schedule
+            // composition as surfaces are updated.
+            if device_size.is_some() {
+                frame.composite_state.composite_native(
+                    self.clear_color,
+                    &results.dirty_rects,
+                    &mut self.device,
+                    &mut **compositor,
+                );
+            }
+        }
+
+        for (_pass_index, pass) in frame.passes.iter_mut().enumerate() {
+            #[cfg(not(target_os = "android"))]
+            let _gm = self.gpu_profiler.start_marker(&format!("pass {}", _pass_index));
+
+            profile_scope!("offscreen target");
+
+            // If this frame has already been drawn, then any texture
+            // cache targets have already been updated and can be
+            // skipped this time.
+            if !frame.has_been_rendered {
+                for (&texture_id, target) in &pass.texture_cache {
+                    self.draw_texture_cache_target(
+                        &texture_id,
+                        target,
+                        &frame.render_tasks,
+                        &mut results.stats,
+                    );
+                }
+
+                if !pass.picture_cache.is_empty() {
+                    self.profile.inc(profiler::COLOR_PASSES);
+                }
+
+                // Draw picture caching tiles for this pass.
+                for picture_target in &pass.picture_cache {
+                    results.stats.color_target_count += 1;
+
+                    let draw_target = match picture_target.surface {
+                        ResolvedSurfaceTexture::TextureCache { ref texture } => {
+                            let (texture, _) = self.texture_resolver
+                                .resolve(texture)
+                                .expect("bug");
+
+                            DrawTarget::from_texture(
+                                texture,
+                                true,
+                            )
+                        }
+                        ResolvedSurfaceTexture::Native { id, size } => {
+                            let surface_info = match self.current_compositor_kind {
+                                CompositorKind::Native { .. } => {
+                                    let compositor = self.compositor_config.compositor().unwrap();
+                                    compositor.bind(
+                                        &mut self.device,
+                                        id,
+                                        picture_target.dirty_rect,
+                                        picture_target.valid_rect,
+                                    )
+                                }
+                                CompositorKind::Draw { .. } => {
+                                    unreachable!();
+                                }
+                            };
+
+                            DrawTarget::NativeSurface {
+                                offset: surface_info.origin,
+                                external_fbo_id: surface_info.fbo_id,
+                                dimensions: size,
+                            }
+                        }
+                    };
+
+                    let projection = Transform3D::ortho(
+                        0.0,
+                        draw_target.dimensions().width as f32,
+                        0.0,
+                        draw_target.dimensions().height as f32,
+                        self.device.ortho_near_plane(),
+                        self.device.ortho_far_plane(),
+                    );
+
+                    self.draw_picture_cache_target(
+                        picture_target,
+                        draw_target,
+                        &projection,
+                        &frame.render_tasks,
+                        &mut results.stats,
+                    );
+
+                    // Native OS surfaces must be unbound at the end of drawing to them
+                    if let ResolvedSurfaceTexture::Native { .. } = picture_target.surface {
+                        match self.current_compositor_kind {
+                            CompositorKind::Native { .. } => {
+                                let compositor = self.compositor_config.compositor().unwrap();
+                                compositor.unbind(&mut self.device);
+                            }
+                            CompositorKind::Draw { .. } => {
+                                unreachable!();
+                            }
+                        }
+                    }
+                }
+            }
+
+            for target in &pass.alpha.targets {
+                results.stats.alpha_target_count += 1;
+
+                let texture_id = target.texture_id();
+
+                let alpha_tex = self.texture_resolver.get_cache_texture_mut(&texture_id);
+
+                let draw_target = DrawTarget::from_texture(
+                    alpha_tex,
+                    false,
+                );
+
+                let projection = Transform3D::ortho(
+                    0.0,
+                    draw_target.dimensions().width as f32,
+                    0.0,
+                    draw_target.dimensions().height as f32,
+                    self.device.ortho_near_plane(),
+                    self.device.ortho_far_plane(),
+                );
+
+                self.draw_alpha_target(
+                    draw_target,
+                    target,
+                    &projection,
+                    &frame.render_tasks,
+                    &mut results.stats,
+                );
+            }
+
+            let color_rt_info = RenderTargetInfo { has_depth: pass.color.needs_depth() };
+
+            for target in &pass.color.targets {
+                results.stats.color_target_count += 1;
+
+                let texture_id = target.texture_id();
+
+                let color_tex = self.texture_resolver.get_cache_texture_mut(&texture_id);
+
+                self.device.reuse_render_target::<u8>(
+                    color_tex,
+                    color_rt_info,
+                );
+
+                let draw_target = DrawTarget::from_texture(
+                    color_tex,
+                    target.needs_depth(),
+                );
+
+                let projection = Transform3D::ortho(
+                    0.0,
+                    draw_target.dimensions().width as f32,
+                    0.0,
+                    draw_target.dimensions().height as f32,
+                    self.device.ortho_near_plane(),
+                    self.device.ortho_far_plane(),
+                );
+
+                let clear_depth = if target.needs_depth() {
+                    Some(1.0)
+                } else {
+                    None
+                };
+
+                self.draw_color_target(
+                    draw_target,
+                    target,
+                    clear_depth,
+                    &frame.render_tasks,
+                    &projection,
+                    &mut results.stats,
+                );
+            }
+
+            // Only end the pass here and invalidate previous textures for
+            // off-screen targets. Deferring return of the inputs to the
+            // frame buffer until the implicit end_pass in end_frame allows
+            // debug draw overlays to be added without triggering a copy
+            // resolve stage in mobile / tiled GPUs.
+            self.texture_resolver.end_pass(
+                &mut self.device,
+                &pass.textures_to_invalidate,
+            );
+            {
+                profile_scope!("gl.flush");
+                self.device.gl().flush();
+            }
+        }
+
+        self.composite_frame(
+            frame,
+            device_size,
+            results,
+            present_mode,
+        );
+
+        if let Some(gpu_buffer_texture) = gpu_buffer_texture {
+            self.device.delete_texture(gpu_buffer_texture);
+        }
+
+        frame.has_been_rendered = true;
+    }
+
+    fn composite_frame(
+        &mut self,
+        frame: &mut Frame,
+        device_size: Option<DeviceIntSize>,
+        results: &mut RenderResults,
+        present_mode: Option<PartialPresentMode>,
+    ) {
+        profile_scope!("main target");
+
+        if let Some(device_size) = device_size {
+            results.stats.color_target_count += 1;
+            results.picture_cache_debug = mem::replace(
+                &mut frame.composite_state.picture_cache_debug,
+                PictureCacheDebugInfo::new(),
+            );
+
+            let size = frame.device_rect.size().to_f32();
+            let surface_origin_is_top_left = self.device.surface_origin_is_top_left();
+            let (bottom, top) = if surface_origin_is_top_left {
+              (0.0, size.height)
+            } else {
+              (size.height, 0.0)
+            };
+
+            let projection = Transform3D::ortho(
+                0.0,
+                size.width,
+                bottom,
+                top,
+                self.device.ortho_near_plane(),
+                self.device.ortho_far_plane(),
+            );
+
+            let fb_scale = Scale::<_, _, FramebufferPixel>::new(1i32);
+            let mut fb_rect = frame.device_rect * fb_scale;
+
+            if !surface_origin_is_top_left {
+                let h = fb_rect.height();
+                fb_rect.min.y = device_size.height - fb_rect.max.y;
+                fb_rect.max.y = fb_rect.min.y + h;
+            }
+
+            let draw_target = DrawTarget::Default {
+                rect: fb_rect,
+                total_size: device_size * fb_scale,
+                surface_origin_is_top_left,
+            };
+
+            // If we have a native OS compositor, then make use of that interface
+            // to specify how to composite each of the picture cache surfaces.
+            match self.current_compositor_kind {
+                CompositorKind::Native { .. } => {
+                    // We have already queued surfaces for early native composition by this point.
+                    // All that is left is to finally update any external native surfaces that were
+                    // invalidated so that composition can complete.
+                    self.update_external_native_surfaces(
+                        &frame.composite_state.external_surfaces,
+                        results,
+                    );
+                }
+                CompositorKind::Draw { .. } => {
+                    self.composite_simple(
+                        &frame.composite_state,
+                        draw_target,
+                        &projection,
+                        results,
+                        present_mode,
+                    );
+                }
+            }
+        } else {
+            // Rendering a frame without presenting it will confuse the partial
+            // present logic, so force a full present for the next frame.
+            self.force_redraw();
+        }
+    }
+
+    pub fn debug_renderer(&mut self) -> Option<&mut DebugRenderer> {
+        self.debug.get_mut(&mut self.device)
+    }
+
+    pub fn get_debug_flags(&self) -> DebugFlags {
+        self.debug_flags
+    }
+
+    pub fn set_debug_flags(&mut self, flags: DebugFlags) {
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_TIME_QUERIES) {
+            if enabled {
+                self.gpu_profiler.enable_timers();
+            } else {
+                self.gpu_profiler.disable_timers();
+            }
+        }
+        if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_SAMPLE_QUERIES) {
+            if enabled {
+                self.gpu_profiler.enable_samplers();
+            } else {
+                self.gpu_profiler.disable_samplers();
+            }
+        }
+
+        self.debug_flags = flags;
+    }
+
+    pub fn set_profiler_ui(&mut self, ui_str: &str) {
+        self.profiler.set_ui(ui_str);
+    }
+
+    fn draw_frame_debug_items(&mut self, items: &[DebugItem]) {
+        if items.is_empty() {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        for item in items {
+            match item {
+                DebugItem::Rect { rect, outer_color, inner_color } => {
+                    debug_renderer.add_quad(
+                        rect.min.x,
+                        rect.min.y,
+                        rect.max.x,
+                        rect.max.y,
+                        (*inner_color).into(),
+                        (*inner_color).into(),
+                    );
+
+                    debug_renderer.add_rect(
+                        &rect.to_i32(),
+                        (*outer_color).into(),
+                    );
+                }
+                DebugItem::Text { ref msg, position, color } => {
+                    debug_renderer.add_text(
+                        position.x,
+                        position.y,
+                        msg,
+                        (*color).into(),
+                        None,
+                    );
+                }
+            }
+        }
+    }
+
+    fn draw_render_target_debug(&mut self, draw_target: &DrawTarget) {
+        if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let textures = self.texture_resolver
+            .texture_cache_map
+            .values()
+            .filter(|item| item.category == TextureCacheCategory::RenderTarget)
+            .map(|item| &item.texture)
+            .collect::<Vec<&Texture>>();
+
+        Self::do_debug_blit(
+            &mut self.device,
+            debug_renderer,
+            textures,
+            draw_target,
+            0,
+            &|_| [0.0, 1.0, 0.0, 1.0], // Use green for all RTs.
+        );
+    }
+
+    fn draw_zoom_debug(
+        &mut self,
+        device_size: DeviceIntSize,
+    ) {
+        if !self.debug_flags.contains(DebugFlags::ZOOM_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let source_size = DeviceIntSize::new(64, 64);
+        let target_size = DeviceIntSize::new(1024, 1024);
+
+        let source_origin = DeviceIntPoint::new(
+            (self.cursor_position.x - source_size.width / 2)
+                .min(device_size.width - source_size.width)
+                .max(0),
+            (self.cursor_position.y - source_size.height / 2)
+                .min(device_size.height - source_size.height)
+                .max(0),
+        );
+
+        let source_rect = DeviceIntRect::from_origin_and_size(
+            source_origin,
+            source_size,
+        );
+
+        let target_rect = DeviceIntRect::from_origin_and_size(
+            DeviceIntPoint::new(
+                device_size.width - target_size.width - 64,
+                device_size.height - target_size.height - 64,
+            ),
+            target_size,
+        );
+
+        let texture_rect = FramebufferIntRect::from_size(
+            source_rect.size().cast_unit(),
+        );
+
+        debug_renderer.add_rect(
+            &target_rect.inflate(1, 1),
+            debug_colors::RED.into(),
+        );
+
+        if self.zoom_debug_texture.is_none() {
+            let texture = self.device.create_texture(
+                ImageBufferKind::Texture2D,
+                ImageFormat::BGRA8,
+                source_rect.width(),
+                source_rect.height(),
+                TextureFilter::Nearest,
+                Some(RenderTargetInfo { has_depth: false }),
+            );
+
+            self.zoom_debug_texture = Some(texture);
+        }
+
+        // Copy frame buffer into the zoom texture
+        let read_target = DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left());
+        self.device.blit_render_target(
+            read_target.into(),
+            read_target.to_framebuffer_rect(source_rect),
+            DrawTarget::from_texture(
+                self.zoom_debug_texture.as_ref().unwrap(),
+                false,
+            ),
+            texture_rect,
+            TextureFilter::Nearest,
+        );
+
+        // Draw the zoom texture back to the framebuffer
+        self.device.blit_render_target(
+            ReadTarget::from_texture(
+                self.zoom_debug_texture.as_ref().unwrap(),
+            ),
+            texture_rect,
+            read_target,
+            read_target.to_framebuffer_rect(target_rect),
+            TextureFilter::Nearest,
+        );
+    }
+
+    fn draw_texture_cache_debug(&mut self, draw_target: &DrawTarget) {
+        if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let textures = self.texture_resolver
+            .texture_cache_map
+            .values()
+            .filter(|item| item.category == TextureCacheCategory::Atlas)
+            .map(|item| &item.texture)
+            .collect::<Vec<&Texture>>();
+
+        fn select_color(texture: &Texture) -> [f32; 4] {
+            if texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) {
+                [1.0, 0.5, 0.0, 1.0] // Orange for shared.
+            } else {
+                [1.0, 0.0, 1.0, 1.0] // Fuchsia for standalone.
+            }
+        }
+
+        Self::do_debug_blit(
+            &mut self.device,
+            debug_renderer,
+            textures,
+            draw_target,
+            if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) { 544 } else { 0 },
+            &select_color,
+        );
+    }
+
+    fn do_debug_blit(
+        device: &mut Device,
+        debug_renderer: &mut DebugRenderer,
+        mut textures: Vec<&Texture>,
+        draw_target: &DrawTarget,
+        bottom: i32,
+        select_color: &dyn Fn(&Texture) -> [f32; 4],
+    ) {
+        let mut spacing = 16;
+        let mut size = 512;
+
+        let device_size = draw_target.dimensions();
+        let fb_width = device_size.width;
+        let fb_height = device_size.height;
+        let surface_origin_is_top_left = draw_target.surface_origin_is_top_left();
+
+        let num_textures = textures.len() as i32;
+
+        if num_textures * (size + spacing) > fb_width {
+            let factor = fb_width as f32 / (num_textures * (size + spacing)) as f32;
+            size = (size as f32 * factor) as i32;
+            spacing = (spacing as f32 * factor) as i32;
+        }
+
+        let text_height = 14; // Visually approximated.
+        let text_margin = 1;
+        let tag_height = text_height + text_margin * 2;
+        let tag_y = fb_height - (bottom + spacing + tag_height);
+        let image_y = tag_y - size;
+
+        // Sort the display by size (in bytes), so that left-to-right is
+        // largest-to-smallest.
+        //
+        // Note that the vec here is in increasing order, because the elements
+        // get drawn right-to-left.
+        textures.sort_by_key(|t| t.size_in_bytes());
+
+        let mut i = 0;
+        for texture in textures.iter() {
+            let dimensions = texture.get_dimensions();
+            let src_rect = FramebufferIntRect::from_size(
+                FramebufferIntSize::new(dimensions.width as i32, dimensions.height as i32),
+            );
+
+            let x = fb_width - (spacing + size) * (i as i32 + 1);
+
+            // If we have more targets than fit on one row in screen, just early exit.
+            if x > fb_width {
+                return;
+            }
+
+            // Draw the info tag.
+            let tag_rect = rect(x, tag_y, size, tag_height).to_box2d();
+            let tag_color = select_color(texture);
+            device.clear_target(
+                Some(tag_color),
+                None,
+                Some(draw_target.to_framebuffer_rect(tag_rect)),
+            );
+
+            // Draw the dimensions onto the tag.
+            let dim = texture.get_dimensions();
+            let text_rect = tag_rect.inflate(-text_margin, -text_margin);
+            debug_renderer.add_text(
+                text_rect.min.x as f32,
+                text_rect.max.y as f32, // Top-relative.
+                &format!("{}x{}", dim.width, dim.height),
+                ColorU::new(0, 0, 0, 255),
+                Some(tag_rect.to_f32())
+            );
+
+            // Blit the contents of the texture.
+            let dest_rect = draw_target.to_framebuffer_rect(rect(x, image_y, size, size).to_box2d());
+            let read_target = ReadTarget::from_texture(texture);
+
+            if surface_origin_is_top_left {
+                device.blit_render_target(
+                    read_target,
+                    src_rect,
+                    *draw_target,
+                    dest_rect,
+                    TextureFilter::Linear,
+                );
+            } else {
+                 // Invert y.
+                 device.blit_render_target_invert_y(
+                    read_target,
+                    src_rect,
+                    *draw_target,
+                    dest_rect,
+                );
+            }
+            i += 1;
+        }
+    }
+
+    fn draw_epoch_debug(&mut self) {
+        if !self.debug_flags.contains(DebugFlags::EPOCHS) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let dy = debug_renderer.line_height();
+        let x0: f32 = 30.0;
+        let y0: f32 = 30.0;
+        let mut y = y0;
+        let mut text_width = 0.0;
+        for ((pipeline, document_id), epoch) in  &self.pipeline_info.epochs {
+            y += dy;
+            let w = debug_renderer.add_text(
+                x0, y,
+                &format!("({:?}, {:?}): {:?}", pipeline, document_id, epoch),
+                ColorU::new(255, 255, 0, 255),
+                None,
+            ).size.width;
+            text_width = f32::max(text_width, w);
+        }
+
+        let margin = 10.0;
+        debug_renderer.add_quad(
+            x0 - margin,
+            y0 - margin,
+            x0 + text_width + margin,
+            y + margin,
+            ColorU::new(25, 25, 25, 200),
+            ColorU::new(51, 51, 51, 200),
+        );
+    }
+
+    fn draw_window_visibility_debug(&mut self) {
+        if !self.debug_flags.contains(DebugFlags::WINDOW_VISIBILITY_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let x: f32 = 30.0;
+        let y: f32 = 40.0;
+
+        if let CompositorConfig::Native { ref mut compositor, .. } = self.compositor_config {
+            let visibility = compositor.get_window_visibility(&mut self.device);
+            let color = if visibility.is_fully_occluded {
+                ColorU::new(255, 0, 0, 255)
+
+            } else {
+                ColorU::new(0, 0, 255, 255)
+            };
+
+            debug_renderer.add_text(
+                x, y,
+                &format!("{:?}", visibility),
+                color,
+                None,
+            );
+        }
+
+
+    }
+
+    fn draw_gpu_cache_debug(&mut self, device_size: DeviceIntSize) {
+        if !self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
+            return;
+        }
+
+        let debug_renderer = match self.debug.get_mut(&mut self.device) {
+            Some(render) => render,
+            None => return,
+        };
+
+        let (x_off, y_off) = (30f32, 30f32);
+        let height = self.gpu_cache_texture.get_height()
+            .min(device_size.height - (y_off as i32) * 2) as usize;
+        debug_renderer.add_quad(
+            x_off,
+            y_off,
+            x_off + MAX_VERTEX_TEXTURE_WIDTH as f32,
+            y_off + height as f32,
+            ColorU::new(80, 80, 80, 80),
+            ColorU::new(80, 80, 80, 80),
+        );
+
+        let upper = self.gpu_cache_debug_chunks.len().min(height);
+        for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() {
+            let color = ColorU::new(250, 0, 0, 200);
+            debug_renderer.add_quad(
+                x_off + chunk.address.u as f32,
+                y_off + chunk.address.v as f32,
+                x_off + chunk.address.u as f32 + chunk.size as f32,
+                y_off + chunk.address.v as f32 + 1.0,
+                color,
+                color,
+            );
+        }
+    }
+
+    /// Pass-through to `Device::read_pixels_into`, used by Gecko's WR bindings.
+    pub fn read_pixels_into(&mut self, rect: FramebufferIntRect, format: ImageFormat, output: &mut [u8]) {
+        self.device.read_pixels_into(rect, format, output);
+    }
+
+    pub fn read_pixels_rgba8(&mut self, rect: FramebufferIntRect) -> Vec<u8> {
+        let mut pixels = vec![0; (rect.area() * 4) as usize];
+        self.device.read_pixels_into(rect, ImageFormat::RGBA8, &mut pixels);
+        pixels
+    }
+
+    // De-initialize the Renderer safely, assuming the GL is still alive and active.
+    pub fn deinit(mut self) {
+        //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
+        self.device.begin_frame();
+        // If we are using a native compositor, ensure that any remaining native
+        // surfaces are freed.
+        if let CompositorConfig::Native { mut compositor, .. } = self.compositor_config {
+            for id in self.allocated_native_surfaces.drain() {
+                compositor.destroy_surface(&mut self.device, id);
+            }
+            // Destroy the debug overlay surface, if currently allocated.
+            if self.debug_overlay_state.current_size.is_some() {
+                compositor.destroy_surface(&mut self.device, NativeSurfaceId::DEBUG_OVERLAY);
+            }
+            compositor.deinit(&mut self.device);
+        }
+        self.gpu_cache_texture.deinit(&mut self.device);
+        if let Some(dither_matrix_texture) = self.dither_matrix_texture {
+            self.device.delete_texture(dither_matrix_texture);
+        }
+        if let Some(zoom_debug_texture) = self.zoom_debug_texture {
+            self.device.delete_texture(zoom_debug_texture);
+        }
+        for textures in self.vertex_data_textures.drain(..) {
+            textures.deinit(&mut self.device);
+        }
+        self.texture_upload_pbo_pool.deinit(&mut self.device);
+        self.staging_texture_pool.delete_textures(&mut self.device);
+        self.texture_resolver.deinit(&mut self.device);
+        self.vaos.deinit(&mut self.device);
+        self.debug.deinit(&mut self.device);
+
+        if let Ok(shaders) = Rc::try_unwrap(self.shaders) {
+            shaders.into_inner().deinit(&mut self.device);
+        }
+
+        if let Some(async_screenshots) = self.async_screenshots.take() {
+            async_screenshots.deinit(&mut self.device);
+        }
+
+        if let Some(async_frame_recorder) = self.async_frame_recorder.take() {
+            async_frame_recorder.deinit(&mut self.device);
+        }
+
+        #[cfg(feature = "capture")]
+        self.device.delete_fbo(self.read_fbo);
+        #[cfg(feature = "replay")]
+        for (_, ext) in self.owned_external_images {
+            self.device.delete_external_texture(ext);
+        }
+        self.device.end_frame();
+    }
+
+    fn size_of<T>(&self, ptr: *const T) -> usize {
+        let ops = self.size_of_ops.as_ref().unwrap();
+        unsafe { ops.malloc_size_of(ptr) }
+    }
+
+    /// Collects a memory report.
+    pub fn report_memory(&self, swgl: *mut c_void) -> MemoryReport {
+        let mut report = MemoryReport::default();
+
+        // GPU cache CPU memory.
+        self.gpu_cache_texture.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+
+        self.staging_texture_pool.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+
+        // Render task CPU memory.
+        for (_id, doc) in &self.active_documents {
+            report.render_tasks += self.size_of(doc.frame.render_tasks.tasks.as_ptr());
+            report.render_tasks += self.size_of(doc.frame.render_tasks.task_data.as_ptr());
+        }
+
+        // Vertex data GPU memory.
+        for textures in &self.vertex_data_textures {
+            report.vertex_data_textures += textures.size_in_bytes();
+        }
+
+        // Texture cache and render target GPU memory.
+        report += self.texture_resolver.report_memory();
+
+        // Texture upload PBO memory.
+        report += self.texture_upload_pbo_pool.report_memory();
+
+        // Textures held internally within the device layer.
+        report += self.device.report_memory(self.size_of_ops.as_ref().unwrap(), swgl);
+
+        report
+    }
+
+    // Sets the blend mode. Blend is unconditionally set if the "show overdraw" debugging mode is
+    // enabled.
+    fn set_blend(&mut self, mut blend: bool, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            blend = true
+        }
+        self.device.set_blend(blend)
+    }
+
+    fn set_blend_mode_multiply(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_multiply();
+        }
+    }
+
+    fn set_blend_mode_premultiplied_alpha(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_premultiplied_alpha();
+        }
+    }
+
+    fn set_blend_mode_subpixel_with_bg_color_pass1(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_subpixel_with_bg_color_pass1();
+        }
+    }
+
+    fn set_blend_mode_subpixel_with_bg_color_pass2(&mut self, framebuffer_kind: FramebufferKind) {
+        if framebuffer_kind == FramebufferKind::Main &&
+                self.debug_flags.contains(DebugFlags::SHOW_OVERDRAW) {
+            self.device.set_blend_mode_show_overdraw();
+        } else {
+            self.device.set_blend_mode_subpixel_with_bg_color_pass2();
+        }
+    }
+
+    /// Clears the texture with a given color.
+    fn clear_texture(&mut self, texture: &Texture, color: [f32; 4]) {
+        self.device.bind_draw_target(DrawTarget::from_texture(
+            &texture,
+            false,
+        ));
+        self.device.clear_target(Some(color), None, None);
+    }
+}
+
+bitflags! {
+    /// Flags that control how shaders are pre-cached, if at all.
+    #[derive(Default)]
+    pub struct ShaderPrecacheFlags: u32 {
+        /// Needed for const initialization
+        const EMPTY                 = 0;
+
+        /// Only start async compile
+        const ASYNC_COMPILE         = 1 << 2;
+
+        /// Do a full compile/link during startup
+        const FULL_COMPILE          = 1 << 3;
+    }
+}
+
+/// The cumulative times spent in each painting phase to generate this frame.
+#[derive(Debug, Default)]
+pub struct FullFrameStats {
+    pub full_display_list: bool,
+    pub gecko_display_list_time: f64,
+    pub wr_display_list_time: f64,
+    pub scene_build_time: f64,
+    pub frame_build_time: f64,
+}
+
+impl FullFrameStats {
+    pub fn merge(&self, other: &FullFrameStats) -> Self {
+        Self {
+            full_display_list: self.full_display_list || other.full_display_list,
+            gecko_display_list_time: self.gecko_display_list_time + other.gecko_display_list_time,
+            wr_display_list_time: self.wr_display_list_time + other.wr_display_list_time,
+            scene_build_time: self.scene_build_time + other.scene_build_time,
+            frame_build_time: self.frame_build_time + other.frame_build_time
+        }
+    }
+
+    pub fn total(&self) -> f64 {
+      self.gecko_display_list_time + self.wr_display_list_time + self.scene_build_time + self.frame_build_time
+    }
+}
+
+/// Some basic statistics about the rendered scene, used in Gecko, as
+/// well as in wrench reftests to ensure that tests are batching and/or
+/// allocating on render targets as we expect them to.
+#[repr(C)]
+#[derive(Debug, Default)]
+pub struct RendererStats {
+    pub total_draw_calls: usize,
+    pub alpha_target_count: usize,
+    pub color_target_count: usize,
+    pub texture_upload_mb: f64,
+    pub resource_upload_time: f64,
+    pub gpu_cache_upload_time: f64,
+    pub gecko_display_list_time: f64,
+    pub wr_display_list_time: f64,
+    pub scene_build_time: f64,
+    pub frame_build_time: f64,
+    pub full_display_list: bool,
+    pub full_paint: bool,
+}
+
+impl RendererStats {
+    pub fn merge(&mut self, stats: &FullFrameStats) {
+        self.gecko_display_list_time = stats.gecko_display_list_time;
+        self.wr_display_list_time = stats.wr_display_list_time;
+        self.scene_build_time = stats.scene_build_time;
+        self.frame_build_time = stats.frame_build_time;
+        self.full_display_list = stats.full_display_list;
+        self.full_paint = true;
+    }
+}
+
+/// Return type from render(), which contains some repr(C) statistics as well as
+/// some non-repr(C) data.
+#[derive(Debug, Default)]
+pub struct RenderResults {
+    /// Statistics about the frame that was rendered.
+    pub stats: RendererStats,
+
+    /// A list of the device dirty rects that were updated
+    /// this frame.
+    /// TODO(gw): This is an initial interface, likely to change in future.
+    /// TODO(gw): The dirty rects here are currently only useful when scrolling
+    ///           is not occurring. They are still correct in the case of
+    ///           scrolling, but will be very large (until we expose proper
+    ///           OS compositor support where the dirty rects apply to a
+    ///           specific picture cache slice / OS compositor surface).
+    pub dirty_rects: Vec<DeviceIntRect>,
+
+    /// Information about the state of picture cache tiles. This is only
+    /// allocated and stored if config.testing is true (such as wrench)
+    pub picture_cache_debug: PictureCacheDebugInfo,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainTexture {
+    data: String,
+    size: DeviceIntSize,
+    format: ImageFormat,
+    filter: TextureFilter,
+    has_depth: bool,
+    category: Option<TextureCacheCategory>,
+}
+
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainRenderer {
+    device_size: Option<DeviceIntSize>,
+    gpu_cache: PlainTexture,
+    gpu_cache_frame_id: FrameId,
+    textures: FastHashMap<CacheTextureId, PlainTexture>,
+}
+
+#[cfg(any(feature = "capture", feature = "replay"))]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct PlainExternalResources {
+    images: Vec<ExternalCaptureImage>
+}
+
+#[cfg(feature = "replay")]
+enum CapturedExternalImageData {
+    NativeTexture(gl::GLuint),
+    Buffer(Arc<Vec<u8>>),
+}
+
+#[cfg(feature = "replay")]
+struct DummyExternalImageHandler {
+    data: FastHashMap<(ExternalImageId, u8), (CapturedExternalImageData, TexelRect)>,
+}
+
+#[cfg(feature = "replay")]
+impl ExternalImageHandler for DummyExternalImageHandler {
+    fn lock(&mut self, key: ExternalImageId, channel_index: u8) -> ExternalImage {
+        let (ref captured_data, ref uv) = self.data[&(key, channel_index)];
+        ExternalImage {
+            uv: *uv,
+            source: match *captured_data {
+                CapturedExternalImageData::NativeTexture(tid) => ExternalImageSource::NativeTexture(tid),
+                CapturedExternalImageData::Buffer(ref arc) => ExternalImageSource::RawData(&*arc),
+            }
+        }
+    }
+    fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
+}
+
+#[derive(Default)]
+pub struct PipelineInfo {
+    pub epochs: FastHashMap<(PipelineId, DocumentId), Epoch>,
+    pub removed_pipelines: Vec<(PipelineId, DocumentId)>,
+}
+
+impl Renderer {
+    #[cfg(feature = "capture")]
+    fn save_texture(
+        texture: &Texture, category: Option<TextureCacheCategory>, name: &str, root: &PathBuf, device: &mut Device
+    ) -> PlainTexture {
+        use std::fs;
+        use std::io::Write;
+
+        let short_path = format!("textures/{}.raw", name);
+
+        let bytes_per_pixel = texture.get_format().bytes_per_pixel();
+        let read_format = texture.get_format();
+        let rect_size = texture.get_dimensions();
+
+        let mut file = fs::File::create(root.join(&short_path))
+            .expect(&format!("Unable to create {}", short_path));
+        let bytes_per_texture = (rect_size.width * rect_size.height * bytes_per_pixel) as usize;
+        let mut data = vec![0; bytes_per_texture];
+
+        //TODO: instead of reading from an FBO with `read_pixels*`, we could
+        // read from textures directly with `get_tex_image*`.
+
+        let rect = device_size_as_framebuffer_size(rect_size).into();
+
+        device.attach_read_texture(texture);
+        #[cfg(feature = "png")]
+        {
+            let mut png_data;
+            let (data_ref, format) = match texture.get_format() {
+                ImageFormat::RGBAF32 => {
+                    png_data = vec![0; (rect_size.width * rect_size.height * 4) as usize];
+                    device.read_pixels_into(rect, ImageFormat::RGBA8, &mut png_data);
+                    (&png_data, ImageFormat::RGBA8)
+                }
+                fm => (&data, fm),
+            };
+            CaptureConfig::save_png(
+                root.join(format!("textures/{}-{}.png", name, 0)),
+                rect_size, format,
+                None,
+                data_ref,
+            );
+        }
+        device.read_pixels_into(rect, read_format, &mut data);
+        file.write_all(&data)
+            .unwrap();
+
+        PlainTexture {
+            data: short_path,
+            size: rect_size,
+            format: texture.get_format(),
+            filter: texture.get_filter(),
+            has_depth: texture.supports_depth(),
+            category,
+        }
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_texture(
+        target: ImageBufferKind,
+        plain: &PlainTexture,
+        rt_info: Option<RenderTargetInfo>,
+        root: &PathBuf,
+        device: &mut Device
+    ) -> (Texture, Vec<u8>)
+    {
+        use std::fs::File;
+        use std::io::Read;
+
+        let mut texels = Vec::new();
+        File::open(root.join(&plain.data))
+            .expect(&format!("Unable to open texture at {}", plain.data))
+            .read_to_end(&mut texels)
+            .unwrap();
+
+        let texture = device.create_texture(
+            target,
+            plain.format,
+            plain.size.width,
+            plain.size.height,
+            plain.filter,
+            rt_info,
+        );
+        device.upload_texture_immediate(&texture, &texels);
+
+        (texture, texels)
+    }
+
+    #[cfg(feature = "capture")]
+    fn save_capture(
+        &mut self,
+        config: CaptureConfig,
+        deferred_images: Vec<ExternalCaptureImage>,
+    ) {
+        use std::fs;
+        use std::io::Write;
+        use api::ExternalImageData;
+        use crate::render_api::CaptureBits;
+
+        let root = config.resource_root();
+
+        self.device.begin_frame();
+        let _gm = self.gpu_profiler.start_marker("read GPU data");
+        self.device.bind_read_target_impl(self.read_fbo, DeviceIntPoint::zero());
+
+        if config.bits.contains(CaptureBits::EXTERNAL_RESOURCES) && !deferred_images.is_empty() {
+            info!("saving external images");
+            let mut arc_map = FastHashMap::<*const u8, String>::default();
+            let mut tex_map = FastHashMap::<u32, String>::default();
+            let handler = self.external_image_handler
+                .as_mut()
+                .expect("Unable to lock the external image handler!");
+            for def in &deferred_images {
+                info!("\t{}", def.short_path);
+                let ExternalImageData { id, channel_index, image_type } = def.external;
+                // The image rendering parameter is irrelevant because no filtering happens during capturing.
+                let ext_image = handler.lock(id, channel_index);
+                let (data, short_path) = match ext_image.source {
+                    ExternalImageSource::RawData(data) => {
+                        let arc_id = arc_map.len() + 1;
+                        match arc_map.entry(data.as_ptr()) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let short_path = format!("externals/d{}.raw", arc_id);
+                                (Some(data.to_vec()), e.insert(short_path).clone())
+                            }
+                        }
+                    }
+                    ExternalImageSource::NativeTexture(gl_id) => {
+                        let tex_id = tex_map.len() + 1;
+                        match tex_map.entry(gl_id) {
+                            Entry::Occupied(e) => {
+                                (None, e.get().clone())
+                            }
+                            Entry::Vacant(e) => {
+                                let target = match image_type {
+                                    ExternalImageType::TextureHandle(target) => target,
+                                    ExternalImageType::Buffer => unreachable!(),
+                                };
+                                info!("\t\tnative texture of target {:?}", target);
+                                self.device.attach_read_texture_external(gl_id, target);
+                                let data = self.device.read_pixels(&def.descriptor);
+                                let short_path = format!("externals/t{}.raw", tex_id);
+                                (Some(data), e.insert(short_path).clone())
+                            }
+                        }
+                    }
+                    ExternalImageSource::Invalid => {
+                        info!("\t\tinvalid source!");
+                        (None, String::new())
+                    }
+                };
+                if let Some(bytes) = data {
+                    fs::File::create(root.join(&short_path))
+                        .expect(&format!("Unable to create {}", short_path))
+                        .write_all(&bytes)
+                        .unwrap();
+                    #[cfg(feature = "png")]
+                    CaptureConfig::save_png(
+                        root.join(&short_path).with_extension("png"),
+                        def.descriptor.size,
+                        def.descriptor.format,
+                        def.descriptor.stride,
+                        &bytes,
+                    );
+                }
+                let plain = PlainExternalImage {
+                    data: short_path,
+                    external: def.external,
+                    uv: ext_image.uv,
+                };
+                config.serialize_for_resource(&plain, &def.short_path);
+            }
+            for def in &deferred_images {
+                handler.unlock(def.external.id, def.external.channel_index);
+            }
+            let plain_external = PlainExternalResources {
+                images: deferred_images,
+            };
+            config.serialize_for_resource(&plain_external, "external_resources");
+        }
+
+        if config.bits.contains(CaptureBits::FRAME) {
+            let path_textures = root.join("textures");
+            if !path_textures.is_dir() {
+                fs::create_dir(&path_textures).unwrap();
+            }
+
+            info!("saving GPU cache");
+            self.update_gpu_cache(); // flush pending updates
+            let mut plain_self = PlainRenderer {
+                device_size: self.device_size,
+                gpu_cache: Self::save_texture(
+                    self.gpu_cache_texture.get_texture(),
+                    None, "gpu", &root, &mut self.device,
+                ),
+                gpu_cache_frame_id: self.gpu_cache_frame_id,
+                textures: FastHashMap::default(),
+            };
+
+            info!("saving cached textures");
+            for (id, item) in &self.texture_resolver.texture_cache_map {
+                let file_name = format!("cache-{}", plain_self.textures.len() + 1);
+                info!("\t{}", file_name);
+                let plain = Self::save_texture(&item.texture, Some(item.category), &file_name, &root, &mut self.device);
+                plain_self.textures.insert(*id, plain);
+            }
+
+            config.serialize_for_resource(&plain_self, "renderer");
+        }
+
+        self.device.reset_read_target();
+        self.device.end_frame();
+
+        let mut stats_file = fs::File::create(config.root.join("profiler-stats.txt"))
+            .expect(&format!("Unable to create profiler-stats.txt"));
+        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
+            self.profiler.dump_stats(&mut stats_file).unwrap();
+        } else {
+            writeln!(stats_file, "Turn on PROFILER_DBG or PROFILER_CAPTURE to get stats here!").unwrap();
+        }
+
+        info!("done.");
+    }
+
+    #[cfg(feature = "replay")]
+    fn load_capture(
+        &mut self,
+        config: CaptureConfig,
+        plain_externals: Vec<PlainExternalImage>,
+    ) {
+        use std::{fs::File, io::Read};
+
+        info!("loading external buffer-backed images");
+        assert!(self.texture_resolver.external_images.is_empty());
+        let mut raw_map = FastHashMap::<String, Arc<Vec<u8>>>::default();
+        let mut image_handler = DummyExternalImageHandler {
+            data: FastHashMap::default(),
+        };
+
+        let root = config.resource_root();
+
+        // Note: this is a `SCENE` level population of the external image handlers
+        // It would put both external buffers and texture into the map.
+        // But latter are going to be overwritten later in this function
+        // if we are in the `FRAME` level.
+        for plain_ext in plain_externals {
+            let data = match raw_map.entry(plain_ext.data) {
+                Entry::Occupied(e) => e.get().clone(),
+                Entry::Vacant(e) => {
+                    let mut buffer = Vec::new();
+                    File::open(root.join(e.key()))
+                        .expect(&format!("Unable to open {}", e.key()))
+                        .read_to_end(&mut buffer)
+                        .unwrap();
+                    e.insert(Arc::new(buffer)).clone()
+                }
+            };
+            let ext = plain_ext.external;
+            let value = (CapturedExternalImageData::Buffer(data), plain_ext.uv);
+            image_handler.data.insert((ext.id, ext.channel_index), value);
+        }
+
+        if let Some(external_resources) = config.deserialize_for_resource::<PlainExternalResources, _>("external_resources") {
+            info!("loading external texture-backed images");
+            let mut native_map = FastHashMap::<String, gl::GLuint>::default();
+            for ExternalCaptureImage { short_path, external, descriptor } in external_resources.images {
+                let target = match external.image_type {
+                    ExternalImageType::TextureHandle(target) => target,
+                    ExternalImageType::Buffer => continue,
+                };
+                let plain_ext = config.deserialize_for_resource::<PlainExternalImage, _>(&short_path)
+                    .expect(&format!("Unable to read {}.ron", short_path));
+                let key = (external.id, external.channel_index);
+
+                let tid = match native_map.entry(plain_ext.data) {
+                    Entry::Occupied(e) => e.get().clone(),
+                    Entry::Vacant(e) => {
+                        let plain_tex = PlainTexture {
+                            data: e.key().clone(),
+                            size: descriptor.size,
+                            format: descriptor.format,
+                            filter: TextureFilter::Linear,
+                            has_depth: false,
+                            category: None,
+                        };
+                        let t = Self::load_texture(
+                            target,
+                            &plain_tex,
+                            None,
+                            &root,
+                            &mut self.device
+                        );
+                        let extex = t.0.into_external();
+                        self.owned_external_images.insert(key, extex.clone());
+                        e.insert(extex.internal_id()).clone()
+                    }
+                };
+
+                let value = (CapturedExternalImageData::NativeTexture(tid), plain_ext.uv);
+                image_handler.data.insert(key, value);
+            }
+        }
+
+        self.device.begin_frame();
+        self.gpu_cache_texture.remove_texture(&mut self.device);
+
+        if let Some(renderer) = config.deserialize_for_resource::<PlainRenderer, _>("renderer") {
+            info!("loading cached textures");
+            self.device_size = renderer.device_size;
+
+            for (_id, item) in self.texture_resolver.texture_cache_map.drain() {
+                self.device.delete_texture(item.texture);
+            }
+            for (id, texture) in renderer.textures {
+                info!("\t{}", texture.data);
+                let target = ImageBufferKind::Texture2D;
+                let t = Self::load_texture(
+                    target,
+                    &texture,
+                    Some(RenderTargetInfo { has_depth: texture.has_depth }),
+                    &root,
+                    &mut self.device
+                );
+                self.texture_resolver.texture_cache_map.insert(id, CacheTexture {
+                    texture: t.0,
+                    category: texture.category.unwrap_or(TextureCacheCategory::Standalone),
+                });
+            }
+
+            info!("loading gpu cache");
+            let (t, gpu_cache_data) = Self::load_texture(
+                ImageBufferKind::Texture2D,
+                &renderer.gpu_cache,
+                Some(RenderTargetInfo { has_depth: false }),
+                &root,
+                &mut self.device,
+            );
+            self.gpu_cache_texture.load_from_data(t, gpu_cache_data);
+            self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
+        } else {
+            info!("loading cached textures");
+            self.device.begin_frame();
+            for (_id, item) in self.texture_resolver.texture_cache_map.drain() {
+                self.device.delete_texture(item.texture);
+            }
+        }
+        self.device.end_frame();
+
+        self.external_image_handler = Some(Box::new(image_handler) as Box<_>);
+        info!("done.");
+    }
+}
+
+#[derive(Clone, Copy, PartialEq)]
+enum FramebufferKind {
+    Main,
+    Other,
+}
+
+fn should_skip_batch(kind: &BatchKind, flags: DebugFlags) -> bool {
+    match kind {
+        BatchKind::TextRun(_) => {
+            flags.contains(DebugFlags::DISABLE_TEXT_PRIMS)
+        }
+        BatchKind::Brush(BrushBatchKind::LinearGradient) => {
+            flags.contains(DebugFlags::DISABLE_GRADIENT_PRIMS)
+        }
+        _ => false,
+    }
+}
+
+impl CompositeState {
+    /// Use the client provided native compositor interface to add all picture
+    /// cache tiles to the OS compositor
+    fn composite_native(
+        &self,
+        clear_color: ColorF,
+        dirty_rects: &[DeviceIntRect],
+        device: &mut Device,
+        compositor: &mut dyn Compositor,
+    ) {
+        // Add each surface to the visual tree. z-order is implicit based on
+        // order added. Offset and clip rect apply to all tiles within this
+        // surface.
+        for surface in &self.descriptor.surfaces {
+            compositor.add_surface(
+                device,
+                surface.surface_id.expect("bug: no native surface allocated"),
+                surface.transform,
+                surface.clip_rect.to_i32(),
+                surface.image_rendering,
+            );
+        }
+        compositor.start_compositing(device, clear_color, dirty_rects, &[]);
+    }
+}
+
+mod tests {
+    #[test]
+    fn test_buffer_damage_tracker() {
+        use super::BufferDamageTracker;
+        use api::units::{DevicePoint, DeviceRect, DeviceSize};
+
+        let mut tracker = BufferDamageTracker::default();
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(3), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(4), None);
+
+        let damage1 = DeviceRect::from_origin_and_size(DevicePoint::new(10.0, 10.0), DeviceSize::new(10.0, 10.0));
+        let damage2 = DeviceRect::from_origin_and_size(DevicePoint::new(20.0, 20.0), DeviceSize::new(10.0, 10.0));
+        let combined = damage1.union(&damage2);
+
+        tracker.push_dirty_rect(&damage1);
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(damage1));
+        assert_eq!(tracker.get_damage_rect(3), Some(damage1));
+        assert_eq!(tracker.get_damage_rect(4), None);
+
+        tracker.push_dirty_rect(&damage2);
+        assert_eq!(tracker.get_damage_rect(0), None);
+        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
+        assert_eq!(tracker.get_damage_rect(2), Some(damage2));
+        assert_eq!(tracker.get_damage_rect(3), Some(combined));
+        assert_eq!(tracker.get_damage_rect(4), None);
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/shade.rs b/gfx/wr/webrender/src/renderer/shade.rs
new file mode 100644
index 0000000000..72b6578a4f
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/shade.rs
@@ -0,0 +1,1496 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ImageBufferKind, units::DeviceSize};
+use crate::batch::{BatchKey, BatchKind, BrushBatchKind, BatchFeatures};
+use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
+use crate::device::{Device, Program, ShaderError};
+use euclid::default::Transform3D;
+use glyph_rasterizer::GlyphFormat;
+use crate::renderer::{
+    desc,
+    BlendMode, DebugFlags, RendererError, WebRenderOptions,
+    TextureSampler, VertexArrayKind, ShaderPrecacheFlags,
+};
+use crate::profiler::{self, TransactionProfile, ns_to_ms};
+
+use gleam::gl::GlType;
+use time::precise_time_ns;
+
+use std::cell::RefCell;
+use std::rc::Rc;
+
+use webrender_build::shader::{ShaderFeatures, ShaderFeatureFlags, get_shader_features};
+
+/// Which extension version to use for texture external support.
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum TextureExternalVersion {
+    // GL_OES_EGL_image_external_essl3 (Compatible with ESSL 3.0 and
+    // later shaders, but not supported on all GLES 3 devices.)
+    ESSL3,
+    // GL_OES_EGL_image_external (Compatible with ESSL 1.0 shaders)
+    ESSL1,
+}
+
+fn get_feature_string(kind: ImageBufferKind, texture_external_version: TextureExternalVersion) -> &'static str {
+    match (kind, texture_external_version) {
+        (ImageBufferKind::Texture2D, _) => "TEXTURE_2D",
+        (ImageBufferKind::TextureRect, _) => "TEXTURE_RECT",
+        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL3) => "TEXTURE_EXTERNAL",
+        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL1) => "TEXTURE_EXTERNAL_ESSL1",
+    }
+}
+
+fn has_platform_support(kind: ImageBufferKind, gl_type: &GlType) -> bool {
+    match (kind, gl_type) {
+        (ImageBufferKind::Texture2D, _) => true,
+        (ImageBufferKind::TextureRect, &GlType::Gles) => false,
+        (ImageBufferKind::TextureRect, &GlType::Gl) => true,
+        (ImageBufferKind::TextureExternal, &GlType::Gles) => true,
+        (ImageBufferKind::TextureExternal, &GlType::Gl) => false,
+    }
+}
+
+pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 3] = [
+    ImageBufferKind::Texture2D,
+    ImageBufferKind::TextureRect,
+    ImageBufferKind::TextureExternal,
+];
+
+const ADVANCED_BLEND_FEATURE: &str = "ADVANCED_BLEND";
+const ALPHA_FEATURE: &str = "ALPHA_PASS";
+const DEBUG_OVERDRAW_FEATURE: &str = "DEBUG_OVERDRAW";
+const DITHERING_FEATURE: &str = "DITHERING";
+const DUAL_SOURCE_FEATURE: &str = "DUAL_SOURCE_BLENDING";
+const FAST_PATH_FEATURE: &str = "FAST_PATH";
+
+pub(crate) enum ShaderKind {
+    Primitive,
+    Cache(VertexArrayKind),
+    ClipCache(VertexArrayKind),
+    Brush,
+    Text,
+    #[allow(dead_code)]
+    VectorStencil,
+    #[allow(dead_code)]
+    VectorCover,
+    #[allow(dead_code)]
+    Resolve,
+    Composite,
+    Clear,
+    Copy,
+}
+
+pub struct LazilyCompiledShader {
+    program: Option<Program>,
+    name: &'static str,
+    kind: ShaderKind,
+    cached_projection: Transform3D<f32>,
+    features: Vec<&'static str>,
+}
+
+impl LazilyCompiledShader {
+    pub(crate) fn new(
+        kind: ShaderKind,
+        name: &'static str,
+        unsorted_features: &[&'static str],
+        device: &mut Device,
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+
+        let mut features = unsorted_features.to_vec();
+        features.sort();
+
+        // Ensure this shader config is in the available shader list so that we get
+        // alerted if the list gets out-of-date when shaders or features are added.
+        let config = features.join(",");
+        assert!(
+            shader_list.get(name).map_or(false, |f| f.contains(&config)),
+            "shader \"{}\" with features \"{}\" not in available shader list",
+            name,
+            config,
+        );
+
+        let mut shader = LazilyCompiledShader {
+            program: None,
+            name,
+            kind,
+            //Note: this isn't really the default state, but there is no chance
+            // an actual projection passed here would accidentally match.
+            cached_projection: Transform3D::identity(),
+            features,
+        };
+
+        if precache_flags.intersects(ShaderPrecacheFlags::ASYNC_COMPILE | ShaderPrecacheFlags::FULL_COMPILE) {
+            let t0 = precise_time_ns();
+            shader.get_internal(device, precache_flags, profile)?;
+            let t1 = precise_time_ns();
+            debug!("[C: {:.1} ms ] Precache {} {:?}",
+                (t1 - t0) as f64 / 1000000.0,
+                name,
+                unsorted_features
+            );
+        }
+
+        Ok(shader)
+    }
+
+    pub fn bind(
+        &mut self,
+        device: &mut Device,
+        projection: &Transform3D<f32>,
+        texture_size: Option<DeviceSize>,
+        renderer_errors: &mut Vec<RendererError>,
+        profile: &mut TransactionProfile,
+    ) {
+        let update_projection = self.cached_projection != *projection;
+        let program = match self.get_internal(device, ShaderPrecacheFlags::FULL_COMPILE, profile) {
+            Ok(program) => program,
+            Err(e) => {
+                renderer_errors.push(RendererError::from(e));
+                return;
+            }
+        };
+        device.bind_program(program);
+        if let Some(texture_size) = texture_size {
+            device.set_shader_texture_size(program, texture_size);
+        }
+        if update_projection {
+            device.set_uniforms(program, projection);
+            // thanks NLL for this (`program` technically borrows `self`)
+            self.cached_projection = *projection;
+        }
+    }
+
+    fn get_internal(
+        &mut self,
+        device: &mut Device,
+        precache_flags: ShaderPrecacheFlags,
+        profile: &mut TransactionProfile,
+    ) -> Result<&mut Program, ShaderError> {
+        if self.program.is_none() {
+            let start_time = precise_time_ns();
+            let program = match self.kind {
+                ShaderKind::Primitive | ShaderKind::Brush | ShaderKind::Text | ShaderKind::Resolve | ShaderKind::Clear | ShaderKind::Copy => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::Cache(..) => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::VectorStencil => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::VectorCover => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::Composite => {
+                    create_prim_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+                ShaderKind::ClipCache(..) => {
+                    create_clip_shader(
+                        self.name,
+                        device,
+                        &self.features,
+                    )
+                }
+            };
+            self.program = Some(program?);
+
+            let end_time = precise_time_ns();
+            profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time));
+        }
+
+        let program = self.program.as_mut().unwrap();
+
+        if precache_flags.contains(ShaderPrecacheFlags::FULL_COMPILE) && !program.is_initialized() {
+            let start_time = precise_time_ns();
+
+            let vertex_format = match self.kind {
+                ShaderKind::Primitive |
+                ShaderKind::Brush |
+                ShaderKind::Text => VertexArrayKind::Primitive,
+                ShaderKind::Cache(format) => format,
+                ShaderKind::VectorStencil => VertexArrayKind::VectorStencil,
+                ShaderKind::VectorCover => VertexArrayKind::VectorCover,
+                ShaderKind::ClipCache(format) => format,
+                ShaderKind::Resolve => VertexArrayKind::Resolve,
+                ShaderKind::Composite => VertexArrayKind::Composite,
+                ShaderKind::Clear => VertexArrayKind::Clear,
+                ShaderKind::Copy => VertexArrayKind::Copy,
+            };
+
+            let vertex_descriptor = match vertex_format {
+                VertexArrayKind::Primitive => &desc::PRIM_INSTANCES,
+                VertexArrayKind::LineDecoration => &desc::LINE,
+                VertexArrayKind::FastLinearGradient => &desc::FAST_LINEAR_GRADIENT,
+                VertexArrayKind::LinearGradient => &desc::LINEAR_GRADIENT,
+                VertexArrayKind::RadialGradient => &desc::RADIAL_GRADIENT,
+                VertexArrayKind::ConicGradient => &desc::CONIC_GRADIENT,
+                VertexArrayKind::Blur => &desc::BLUR,
+                VertexArrayKind::ClipImage => &desc::CLIP_IMAGE,
+                VertexArrayKind::ClipRect => &desc::CLIP_RECT,
+                VertexArrayKind::ClipBoxShadow => &desc::CLIP_BOX_SHADOW,
+                VertexArrayKind::VectorStencil => &desc::VECTOR_STENCIL,
+                VertexArrayKind::VectorCover => &desc::VECTOR_COVER,
+                VertexArrayKind::Border => &desc::BORDER,
+                VertexArrayKind::Scale => &desc::SCALE,
+                VertexArrayKind::Resolve => &desc::RESOLVE,
+                VertexArrayKind::SvgFilter => &desc::SVG_FILTER,
+                VertexArrayKind::Composite => &desc::COMPOSITE,
+                VertexArrayKind::Clear => &desc::CLEAR,
+                VertexArrayKind::Copy => &desc::COPY,
+                VertexArrayKind::Mask => &desc::MASK,
+            };
+
+            device.link_program(program, vertex_descriptor)?;
+            device.bind_program(program);
+            match self.kind {
+                ShaderKind::ClipCache(..) => {
+                    device.bind_shader_samplers(
+                        &program,
+                        &[
+                            ("sColor0", TextureSampler::Color0),
+                            ("sTransformPalette", TextureSampler::TransformPalette),
+                            ("sRenderTasks", TextureSampler::RenderTasks),
+                            ("sGpuCache", TextureSampler::GpuCache),
+                            ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                            ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
+                            ("sGpuBuffer", TextureSampler::GpuBuffer),
+                        ],
+                    );
+                }
+                _ => {
+                    device.bind_shader_samplers(
+                        &program,
+                        &[
+                            ("sColor0", TextureSampler::Color0),
+                            ("sColor1", TextureSampler::Color1),
+                            ("sColor2", TextureSampler::Color2),
+                            ("sDither", TextureSampler::Dither),
+                            ("sTransformPalette", TextureSampler::TransformPalette),
+                            ("sRenderTasks", TextureSampler::RenderTasks),
+                            ("sGpuCache", TextureSampler::GpuCache),
+                            ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
+                            ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
+                            ("sClipMask", TextureSampler::ClipMask),
+                            ("sGpuBuffer", TextureSampler::GpuBuffer),
+                        ],
+                    );
+                }
+            }
+
+            let end_time = precise_time_ns();
+            profile.add(profiler::SHADER_BUILD_TIME, ns_to_ms(end_time - start_time));
+        }
+
+        Ok(program)
+    }
+
+    fn deinit(self, device: &mut Device) {
+        if let Some(program) = self.program {
+            device.delete_program(program);
+        }
+    }
+}
+
+// A brush shader supports two modes:
+// opaque:
+//   Used for completely opaque primitives,
+//   or inside segments of partially
+//   opaque primitives. Assumes no need
+//   for clip masks, AA etc.
+// alpha:
+//   Used for brush primitives in the alpha
+//   pass. Assumes that AA should be applied
+//   along the primitive edge, and also that
+//   clip mask is present.
+struct BrushShader {
+    opaque: LazilyCompiledShader,
+    alpha: LazilyCompiledShader,
+    advanced_blend: Option<LazilyCompiledShader>,
+    dual_source: Option<LazilyCompiledShader>,
+    debug_overdraw: LazilyCompiledShader,
+}
+
+impl BrushShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        use_advanced_blend: bool,
+        use_dual_source: bool,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+        let opaque_features = features.to_vec();
+        let opaque = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &opaque_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut alpha_features = opaque_features.to_vec();
+        alpha_features.push(ALPHA_FEATURE);
+
+        let alpha = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &alpha_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let advanced_blend = if use_advanced_blend {
+            let mut advanced_blend_features = alpha_features.to_vec();
+            advanced_blend_features.push(ADVANCED_BLEND_FEATURE);
+
+            let shader = LazilyCompiledShader::new(
+                ShaderKind::Brush,
+                name,
+                &advanced_blend_features,
+                device,
+                precache_flags,
+                &shader_list,
+                profile,
+            )?;
+
+            Some(shader)
+        } else {
+            None
+        };
+
+        let dual_source = if use_dual_source {
+            let mut dual_source_features = alpha_features.to_vec();
+            dual_source_features.push(DUAL_SOURCE_FEATURE);
+
+            let shader = LazilyCompiledShader::new(
+                ShaderKind::Brush,
+                name,
+                &dual_source_features,
+                device,
+                precache_flags,
+                &shader_list,
+                profile,
+            )?;
+
+            Some(shader)
+        } else {
+            None
+        };
+
+        let mut debug_overdraw_features = features.to_vec();
+        debug_overdraw_features.push(DEBUG_OVERDRAW_FEATURE);
+
+        let debug_overdraw = LazilyCompiledShader::new(
+            ShaderKind::Brush,
+            name,
+            &debug_overdraw_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        Ok(BrushShader {
+            opaque,
+            alpha,
+            advanced_blend,
+            dual_source,
+            debug_overdraw,
+        })
+    }
+
+    fn get(&mut self, blend_mode: BlendMode, features: BatchFeatures, debug_flags: DebugFlags)
+           -> &mut LazilyCompiledShader {
+        match blend_mode {
+            _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw,
+            BlendMode::None => &mut self.opaque,
+            BlendMode::Alpha |
+            BlendMode::PremultipliedAlpha |
+            BlendMode::PremultipliedDestOut |
+            BlendMode::SubpixelWithBgColor |
+            BlendMode::Screen |
+            BlendMode::PlusLighter |
+            BlendMode::Exclusion => {
+                if features.contains(BatchFeatures::ALPHA_PASS) {
+                    &mut self.alpha
+                } else {
+                    &mut self.opaque
+                }
+            }
+            BlendMode::Advanced(_) => {
+                self.advanced_blend
+                    .as_mut()
+                    .expect("bug: no advanced blend shader loaded")
+            }
+            BlendMode::SubpixelDualSource |
+            BlendMode::MultiplyDualSource => {
+                self.dual_source
+                    .as_mut()
+                    .expect("bug: no dual source shader loaded")
+            }
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.opaque.deinit(device);
+        self.alpha.deinit(device);
+        if let Some(advanced_blend) = self.advanced_blend {
+            advanced_blend.deinit(device);
+        }
+        if let Some(dual_source) = self.dual_source {
+            dual_source.deinit(device);
+        }
+        self.debug_overdraw.deinit(device);
+    }
+}
+
+pub struct TextShader {
+    simple: LazilyCompiledShader,
+    glyph_transform: LazilyCompiledShader,
+    debug_overdraw: LazilyCompiledShader,
+}
+
+impl TextShader {
+    fn new(
+        name: &'static str,
+        device: &mut Device,
+        features: &[&'static str],
+        precache_flags: ShaderPrecacheFlags,
+        shader_list: &ShaderFeatures,
+        profile: &mut TransactionProfile,
+    ) -> Result<Self, ShaderError> {
+        let mut simple_features = features.to_vec();
+        simple_features.push("ALPHA_PASS");
+        simple_features.push("TEXTURE_2D");
+
+        let simple = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &simple_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut glyph_transform_features = features.to_vec();
+        glyph_transform_features.push("GLYPH_TRANSFORM");
+        glyph_transform_features.push("ALPHA_PASS");
+        glyph_transform_features.push("TEXTURE_2D");
+
+        let glyph_transform = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &glyph_transform_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut debug_overdraw_features = features.to_vec();
+        debug_overdraw_features.push("DEBUG_OVERDRAW");
+        debug_overdraw_features.push("TEXTURE_2D");
+
+        let debug_overdraw = LazilyCompiledShader::new(
+            ShaderKind::Text,
+            name,
+            &debug_overdraw_features,
+            device,
+            precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        Ok(TextShader { simple, glyph_transform, debug_overdraw })
+    }
+
+    pub fn get(
+        &mut self,
+        glyph_format: GlyphFormat,
+        debug_flags: DebugFlags,
+    ) -> &mut LazilyCompiledShader {
+        match glyph_format {
+            _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw,
+            GlyphFormat::Alpha |
+            GlyphFormat::Subpixel |
+            GlyphFormat::Bitmap |
+            GlyphFormat::ColorBitmap => &mut self.simple,
+            GlyphFormat::TransformedAlpha |
+            GlyphFormat::TransformedSubpixel => &mut self.glyph_transform,
+        }
+    }
+
+    fn deinit(self, device: &mut Device) {
+        self.simple.deinit(device);
+        self.glyph_transform.deinit(device);
+        self.debug_overdraw.deinit(device);
+    }
+}
+
+fn create_prim_shader(
+    name: &'static str,
+    device: &mut Device,
+    features: &[&'static str],
+) -> Result<Program, ShaderError> {
+    debug!("PrimShader {}", name);
+
+    device.create_program(name, features)
+}
+
+fn create_clip_shader(
+    name: &'static str,
+    device: &mut Device,
+    features: &[&'static str],
+) -> Result<Program, ShaderError> {
+    debug!("ClipShader {}", name);
+
+    device.create_program(name, features)
+}
+
+// NB: If you add a new shader here, make sure to deinitialize it
+// in `Shaders::deinit()` below.
+pub struct Shaders {
+    // These are "cache shaders". These shaders are used to
+    // draw intermediate results to cache targets. The results
+    // of these shaders are then used by the primitive shaders.
+    pub cs_blur_a8: LazilyCompiledShader,
+    pub cs_blur_rgba8: LazilyCompiledShader,
+    pub cs_border_segment: LazilyCompiledShader,
+    pub cs_border_solid: LazilyCompiledShader,
+    pub cs_scale: Vec<Option<LazilyCompiledShader>>,
+    pub cs_line_decoration: LazilyCompiledShader,
+    pub cs_fast_linear_gradient: LazilyCompiledShader,
+    pub cs_linear_gradient: LazilyCompiledShader,
+    pub cs_radial_gradient: LazilyCompiledShader,
+    pub cs_conic_gradient: LazilyCompiledShader,
+    pub cs_svg_filter: LazilyCompiledShader,
+
+    // Brush shaders
+    brush_solid: BrushShader,
+    brush_image: Vec<Option<BrushShader>>,
+    brush_fast_image: Vec<Option<BrushShader>>,
+    brush_blend: BrushShader,
+    brush_mix_blend: BrushShader,
+    brush_yuv_image: Vec<Option<BrushShader>>,
+    brush_linear_gradient: BrushShader,
+    brush_opacity: BrushShader,
+    brush_opacity_aa: BrushShader,
+
+    /// These are "cache clip shaders". These shaders are used to
+    /// draw clip instances into the cached clip mask. The results
+    /// of these shaders are also used by the primitive shaders.
+    pub cs_clip_rectangle_slow: LazilyCompiledShader,
+    pub cs_clip_rectangle_fast: LazilyCompiledShader,
+    pub cs_clip_box_shadow: LazilyCompiledShader,
+    pub cs_clip_image: LazilyCompiledShader,
+
+    // The are "primitive shaders". These shaders draw and blend
+    // final results on screen. They are aware of tile boundaries.
+    // Most draw directly to the framebuffer, but some use inputs
+    // from the cache shaders to draw. Specifically, the box
+    // shadow primitive shader stretches the box shadow cache
+    // output, and the cache_image shader blits the results of
+    // a cache shader (e.g. blur) to the screen.
+    pub ps_text_run: TextShader,
+    pub ps_text_run_dual_source: Option<TextShader>,
+
+    ps_split_composite: LazilyCompiledShader,
+    pub ps_quad_textured: LazilyCompiledShader,
+    pub ps_mask: LazilyCompiledShader,
+    pub ps_mask_fast: LazilyCompiledShader,
+    pub ps_clear: LazilyCompiledShader,
+    pub ps_copy: LazilyCompiledShader,
+
+    pub composite: CompositorShaders,
+}
+
+impl Shaders {
+    pub fn new(
+        device: &mut Device,
+        gl_type: GlType,
+        options: &WebRenderOptions,
+    ) -> Result<Self, ShaderError> {
+        // We have to pass a profile around a bunch but we aren't recording the initialization
+        // so use a dummy one.
+        let profile = &mut TransactionProfile::new();
+
+        let use_dual_source_blending =
+            device.get_capabilities().supports_dual_source_blending &&
+            options.allow_dual_source_blending;
+        let use_advanced_blend_equation =
+            device.get_capabilities().supports_advanced_blend_equation &&
+            options.allow_advanced_blend_equation;
+
+        let texture_external_version = if device.get_capabilities().supports_image_external_essl3 {
+            TextureExternalVersion::ESSL3
+        } else {
+            TextureExternalVersion::ESSL1
+        };
+        let mut shader_flags = get_shader_feature_flags(gl_type, texture_external_version);
+        shader_flags.set(ShaderFeatureFlags::ADVANCED_BLEND_EQUATION, use_advanced_blend_equation);
+        shader_flags.set(ShaderFeatureFlags::DUAL_SOURCE_BLENDING, use_dual_source_blending);
+        shader_flags.set(ShaderFeatureFlags::DITHERING, options.enable_dithering);
+        let shader_list = get_shader_features(shader_flags);
+
+        let brush_solid = BrushShader::new(
+            "brush_solid",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_blend = BrushShader::new(
+            "brush_blend",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_mix_blend = BrushShader::new(
+            "brush_mix_blend",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_linear_gradient = BrushShader::new(
+            "brush_linear_gradient",
+            device,
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_opacity_aa = BrushShader::new(
+            "brush_opacity",
+            device,
+            &["ANTIALIASING"],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let brush_opacity = BrushShader::new(
+            "brush_opacity",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            profile,
+        )?;
+
+        let cs_blur_a8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["ALPHA_TARGET"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_blur_rgba8 = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Blur),
+            "cs_blur",
+            &["COLOR_TARGET"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_svg_filter = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::SvgFilter),
+            "cs_svg_filter",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_mask = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Mask),
+            "ps_quad_mask",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_mask_fast = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Mask),
+            "ps_quad_mask",
+            &[FAST_PATH_FEATURE],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_rectangle_slow = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            "cs_clip_rectangle",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_rectangle_fast = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            "cs_clip_rectangle",
+            &[FAST_PATH_FEATURE],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_box_shadow = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipBoxShadow),
+            "cs_clip_box_shadow",
+            &["TEXTURE_2D"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_clip_image = LazilyCompiledShader::new(
+            ShaderKind::ClipCache(VertexArrayKind::ClipImage),
+            "cs_clip_image",
+            &["TEXTURE_2D"],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let mut cs_scale = Vec::new();
+        let scale_shader_num = IMAGE_BUFFER_KINDS.len();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. scale_shader_num {
+            cs_scale.push(None);
+        }
+        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
+            if has_platform_support(*image_buffer_kind, &gl_type) {
+                let feature_string = get_feature_string(
+                    *image_buffer_kind,
+                    texture_external_version,
+                );
+
+                let mut features = Vec::new();
+                if feature_string != "" {
+                    features.push(feature_string);
+                }
+
+                let shader = LazilyCompiledShader::new(
+                    ShaderKind::Cache(VertexArrayKind::Scale),
+                    "cs_scale",
+                    &features,
+                    device,
+                    options.precache_flags,
+                    &shader_list,
+                    profile,
+                 )?;
+
+                 let index = Self::get_compositing_shader_index(
+                    *image_buffer_kind,
+                 );
+                 cs_scale[index] = Some(shader);
+            }
+        }
+
+        // TODO(gw): The split composite + text shader are special cases - the only
+        //           shaders used during normal scene rendering that aren't a brush
+        //           shader. Perhaps we can unify these in future?
+
+        let ps_text_run = TextShader::new("ps_text_run",
+            device,
+            &[],
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_text_run_dual_source = if use_dual_source_blending {
+            let dual_source_features = vec![DUAL_SOURCE_FEATURE];
+            Some(TextShader::new("ps_text_run",
+                device,
+                &dual_source_features,
+                options.precache_flags,
+                &shader_list,
+                profile,
+            )?)
+        } else {
+            None
+        };
+
+        let ps_quad_textured = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            "ps_quad_textured",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_split_composite = LazilyCompiledShader::new(
+            ShaderKind::Primitive,
+            "ps_split_composite",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_clear = LazilyCompiledShader::new(
+            ShaderKind::Clear,
+            "ps_clear",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let ps_copy = LazilyCompiledShader::new(
+            ShaderKind::Copy,
+            "ps_copy",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        // All image configuration.
+        let mut image_features = Vec::new();
+        let mut brush_image = Vec::new();
+        let mut brush_fast_image = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. IMAGE_BUFFER_KINDS.len() {
+            brush_image.push(None);
+            brush_fast_image.push(None);
+        }
+        for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
+            if !has_platform_support(IMAGE_BUFFER_KINDS[buffer_kind], &gl_type)
+                // Brush shaders are not ESSL1 compatible
+                || (IMAGE_BUFFER_KINDS[buffer_kind] == ImageBufferKind::TextureExternal
+                    && texture_external_version == TextureExternalVersion::ESSL1)
+            {
+                continue;
+            }
+
+            let feature_string = get_feature_string(
+                IMAGE_BUFFER_KINDS[buffer_kind],
+                texture_external_version,
+            );
+            if feature_string != "" {
+                image_features.push(feature_string);
+            }
+
+            brush_fast_image[buffer_kind] = Some(BrushShader::new(
+                "brush_image",
+                device,
+                &image_features,
+                options.precache_flags,
+                &shader_list,
+                use_advanced_blend_equation,
+                use_dual_source_blending,
+                profile,
+            )?);
+
+            image_features.push("REPETITION");
+            image_features.push("ANTIALIASING");
+
+            brush_image[buffer_kind] = Some(BrushShader::new(
+                "brush_image",
+                device,
+                &image_features,
+                options.precache_flags,
+                &shader_list,
+                use_advanced_blend_equation,
+                use_dual_source_blending,
+                profile,
+            )?);
+
+            image_features.clear();
+        }
+
+        // All yuv_image configuration.
+        let mut yuv_features = Vec::new();
+        let mut rgba_features = Vec::new();
+        let mut fast_path_features = Vec::new();
+        let yuv_shader_num = IMAGE_BUFFER_KINDS.len();
+        let mut brush_yuv_image = Vec::new();
+        // PrimitiveShader is not clonable. Use push() to initialize the vec.
+        for _ in 0 .. yuv_shader_num {
+            brush_yuv_image.push(None);
+        }
+        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
+            if has_platform_support(*image_buffer_kind, &gl_type) {
+                yuv_features.push("YUV");
+                fast_path_features.push("FAST_PATH");
+
+                let index = Self::get_compositing_shader_index(
+                    *image_buffer_kind,
+                );
+
+                let feature_string = get_feature_string(
+                    *image_buffer_kind,
+                    texture_external_version,
+                );
+                if feature_string != "" {
+                    yuv_features.push(feature_string);
+                    rgba_features.push(feature_string);
+                    fast_path_features.push(feature_string);
+                }
+
+                // YUV shaders are not compatible with ESSL1
+                if *image_buffer_kind != ImageBufferKind::TextureExternal ||
+                    texture_external_version == TextureExternalVersion::ESSL3 {
+                    let brush_shader = BrushShader::new(
+                        "brush_yuv_image",
+                        device,
+                        &yuv_features,
+                        options.precache_flags,
+                        &shader_list,
+                        false /* advanced blend */,
+                        false /* dual source */,
+                        profile,
+                    )?;
+                    brush_yuv_image[index] = Some(brush_shader);
+                }
+
+                yuv_features.clear();
+                rgba_features.clear();
+                fast_path_features.clear();
+            }
+        }
+
+        let cs_line_decoration = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::LineDecoration),
+            "cs_line_decoration",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_fast_linear_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::FastLinearGradient),
+            "cs_fast_linear_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_linear_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::LinearGradient),
+            "cs_linear_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_radial_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::RadialGradient),
+            "cs_radial_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_conic_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::ConicGradient),
+            "cs_conic_gradient",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_border_segment = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Border),
+            "cs_border_segment",
+             &[],
+             device,
+             options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let cs_border_solid = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Border),
+            "cs_border_solid",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+            profile,
+        )?;
+
+        let composite = CompositorShaders::new(device, options.precache_flags, gl_type)?;
+
+        Ok(Shaders {
+            cs_blur_a8,
+            cs_blur_rgba8,
+            cs_border_segment,
+            cs_line_decoration,
+            cs_fast_linear_gradient,
+            cs_linear_gradient,
+            cs_radial_gradient,
+            cs_conic_gradient,
+            cs_border_solid,
+            cs_scale,
+            cs_svg_filter,
+            brush_solid,
+            brush_image,
+            brush_fast_image,
+            brush_blend,
+            brush_mix_blend,
+            brush_yuv_image,
+            brush_linear_gradient,
+            brush_opacity,
+            brush_opacity_aa,
+            cs_clip_rectangle_slow,
+            cs_clip_rectangle_fast,
+            cs_clip_box_shadow,
+            cs_clip_image,
+            ps_text_run,
+            ps_text_run_dual_source,
+            ps_quad_textured,
+            ps_mask,
+            ps_mask_fast,
+            ps_split_composite,
+            ps_clear,
+            ps_copy,
+            composite,
+        })
+    }
+
+    fn get_compositing_shader_index(buffer_kind: ImageBufferKind) -> usize {
+        buffer_kind as usize
+    }
+
+    pub fn get_composite_shader(
+        &mut self,
+        format: CompositeSurfaceFormat,
+        buffer_kind: ImageBufferKind,
+        features: CompositeFeatures,
+    ) -> &mut LazilyCompiledShader {
+        self.composite.get(format, buffer_kind, features)
+    }
+
+    pub fn get_scale_shader(
+        &mut self,
+        buffer_kind: ImageBufferKind,
+    ) -> &mut LazilyCompiledShader {
+        let shader_index = Self::get_compositing_shader_index(buffer_kind);
+        self.cs_scale[shader_index]
+            .as_mut()
+            .expect("bug: unsupported scale shader requested")
+    }
+
+    pub fn get(&
+        mut self,
+        key: &BatchKey,
+        mut features: BatchFeatures,
+        debug_flags: DebugFlags,
+        device: &Device,
+    ) -> &mut LazilyCompiledShader {
+        match key.kind {
+            BatchKind::Primitive => {
+                &mut self.ps_quad_textured
+            }
+            BatchKind::SplitComposite => {
+                &mut self.ps_split_composite
+            }
+            BatchKind::Brush(brush_kind) => {
+                // SWGL uses a native anti-aliasing implementation that bypasses the shader.
+                // Don't consider it in that case when deciding whether or not to use
+                // an alpha-pass shader.
+                if device.get_capabilities().uses_native_antialiasing {
+                    features.remove(BatchFeatures::ANTIALIASING);
+                }
+                let brush_shader = match brush_kind {
+                    BrushBatchKind::Solid => {
+                        &mut self.brush_solid
+                    }
+                    BrushBatchKind::Image(image_buffer_kind) => {
+                        if features.contains(BatchFeatures::ANTIALIASING) ||
+                            features.contains(BatchFeatures::REPETITION) {
+
+                            self.brush_image[image_buffer_kind as usize]
+                                .as_mut()
+                                .expect("Unsupported image shader kind")
+                        } else {
+                            self.brush_fast_image[image_buffer_kind as usize]
+                                .as_mut()
+                                .expect("Unsupported image shader kind")
+                        }
+                    }
+                    BrushBatchKind::Blend => {
+                        &mut self.brush_blend
+                    }
+                    BrushBatchKind::MixBlend { .. } => {
+                        &mut self.brush_mix_blend
+                    }
+                    BrushBatchKind::LinearGradient => {
+                        // SWGL uses a native clip mask implementation that bypasses the shader.
+                        // Don't consider it in that case when deciding whether or not to use
+                        // an alpha-pass shader.
+                        if device.get_capabilities().uses_native_clip_mask {
+                            features.remove(BatchFeatures::CLIP_MASK);
+                        }
+                        // Gradient brushes can optimistically use the opaque shader even
+                        // with a blend mode if they don't require any features.
+                        if !features.intersects(
+                            BatchFeatures::ANTIALIASING
+                                | BatchFeatures::REPETITION
+                                | BatchFeatures::CLIP_MASK,
+                        ) {
+                            features.remove(BatchFeatures::ALPHA_PASS);
+                        }
+                        match brush_kind {
+                            BrushBatchKind::LinearGradient => &mut self.brush_linear_gradient,
+                            _ => panic!(),
+                        }
+                    }
+                    BrushBatchKind::YuvImage(image_buffer_kind, ..) => {
+                        let shader_index =
+                            Self::get_compositing_shader_index(image_buffer_kind);
+                        self.brush_yuv_image[shader_index]
+                            .as_mut()
+                            .expect("Unsupported YUV shader kind")
+                    }
+                    BrushBatchKind::Opacity => {
+                        if features.contains(BatchFeatures::ANTIALIASING) {
+                            &mut self.brush_opacity_aa
+                        } else {
+                            &mut self.brush_opacity
+                        }
+                    }
+                };
+                brush_shader.get(key.blend_mode, features, debug_flags)
+            }
+            BatchKind::TextRun(glyph_format) => {
+                let text_shader = match key.blend_mode {
+                    BlendMode::SubpixelDualSource => self.ps_text_run_dual_source.as_mut().unwrap(),
+                    _ => &mut self.ps_text_run,
+                };
+                text_shader.get(glyph_format, debug_flags)
+            }
+        }
+    }
+
+    pub fn deinit(mut self, device: &mut Device) {
+        for shader in self.cs_scale {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        self.cs_blur_a8.deinit(device);
+        self.cs_blur_rgba8.deinit(device);
+        self.cs_svg_filter.deinit(device);
+        self.brush_solid.deinit(device);
+        self.brush_blend.deinit(device);
+        self.brush_mix_blend.deinit(device);
+        self.brush_linear_gradient.deinit(device);
+        self.brush_opacity.deinit(device);
+        self.brush_opacity_aa.deinit(device);
+        self.cs_clip_rectangle_slow.deinit(device);
+        self.cs_clip_rectangle_fast.deinit(device);
+        self.cs_clip_box_shadow.deinit(device);
+        self.cs_clip_image.deinit(device);
+        self.ps_text_run.deinit(device);
+        if let Some(shader) = self.ps_text_run_dual_source {
+            shader.deinit(device);
+        }
+        for shader in self.brush_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.brush_fast_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.brush_yuv_image {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        self.cs_border_solid.deinit(device);
+        self.cs_fast_linear_gradient.deinit(device);
+        self.cs_linear_gradient.deinit(device);
+        self.cs_radial_gradient.deinit(device);
+        self.cs_conic_gradient.deinit(device);
+        self.cs_line_decoration.deinit(device);
+        self.cs_border_segment.deinit(device);
+        self.ps_split_composite.deinit(device);
+        self.ps_quad_textured.deinit(device);
+        self.ps_mask.deinit(device);
+        self.ps_mask_fast.deinit(device);
+        self.ps_clear.deinit(device);
+        self.ps_copy.deinit(device);
+        self.composite.deinit(device);
+    }
+}
+
+pub type SharedShaders = Rc<RefCell<Shaders>>;
+
+pub struct CompositorShaders {
+    // Composite shaders. These are very simple shaders used to composite
+    // picture cache tiles into the framebuffer on platforms that do not have an
+    // OS Compositor (or we cannot use it).  Such an OS Compositor (such as
+    // DirectComposite or CoreAnimation) handles the composition of the picture
+    // cache tiles at a lower level (e.g. in DWM for Windows); in that case we
+    // directly hand the picture cache surfaces over to the OS Compositor, and
+    // our own Composite shaders below never run.
+    // To composite external (RGB) surfaces we need various permutations of
+    // shaders with WR_FEATURE flags on or off based on the type of image
+    // buffer we're sourcing from (see IMAGE_BUFFER_KINDS).
+    rgba: Vec<Option<LazilyCompiledShader>>,
+    // A faster set of rgba composite shaders that do not support UV clamping
+    // or color modulation.
+    rgba_fast_path: Vec<Option<LazilyCompiledShader>>,
+    // The same set of composite shaders but with WR_FEATURE_YUV added.
+    yuv: Vec<Option<LazilyCompiledShader>>,
+}
+
+impl CompositorShaders {
+    pub fn new(
+        device: &mut Device,
+        precache_flags: ShaderPrecacheFlags,
+        gl_type: GlType,
+    )  -> Result<Self, ShaderError>  {
+        // We have to pass a profile around a bunch but we aren't recording the initialization
+        // so use a dummy one.
+        let mut profile = TransactionProfile::new();
+
+        let mut yuv_features = Vec::new();
+        let mut rgba_features = Vec::new();
+        let mut fast_path_features = Vec::new();
+        let mut rgba = Vec::new();
+        let mut rgba_fast_path = Vec::new();
+        let mut yuv = Vec::new();
+
+        let texture_external_version = if device.get_capabilities().supports_image_external_essl3 {
+            TextureExternalVersion::ESSL3
+        } else {
+            TextureExternalVersion::ESSL1
+        };
+
+        let feature_flags = get_shader_feature_flags(gl_type, texture_external_version);
+        let shader_list = get_shader_features(feature_flags);
+
+        for _ in 0..IMAGE_BUFFER_KINDS.len() {
+            yuv.push(None);
+            rgba.push(None);
+            rgba_fast_path.push(None);
+        }
+
+        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
+            if !has_platform_support(*image_buffer_kind, &gl_type) {
+                continue;
+            }
+
+            yuv_features.push("YUV");
+            fast_path_features.push("FAST_PATH");
+    
+            let index = Self::get_shader_index(*image_buffer_kind);
+
+            let feature_string = get_feature_string(
+                *image_buffer_kind,
+                texture_external_version,
+            );
+            if feature_string != "" {
+                yuv_features.push(feature_string);
+                rgba_features.push(feature_string);
+                fast_path_features.push(feature_string);
+            }
+
+            // YUV shaders are not compatible with ESSL1
+            if *image_buffer_kind != ImageBufferKind::TextureExternal ||
+                texture_external_version == TextureExternalVersion::ESSL3 {
+
+                yuv[index] = Some(LazilyCompiledShader::new(
+                    ShaderKind::Composite,
+                    "composite",
+                    &yuv_features,
+                    device,
+                    precache_flags,
+                    &shader_list,
+                    &mut profile,
+                )?);
+            }
+
+            rgba[index] = Some(LazilyCompiledShader::new(
+                ShaderKind::Composite,
+                "composite",
+                &rgba_features,
+                device,
+                precache_flags,
+                &shader_list,
+                &mut profile,
+            )?);
+
+            rgba_fast_path[index] = Some(LazilyCompiledShader::new(
+                ShaderKind::Composite,
+                "composite",
+                &fast_path_features,
+                device,
+                precache_flags,
+                &shader_list,
+                &mut profile,
+            )?);
+
+            yuv_features.clear();
+            rgba_features.clear();
+            fast_path_features.clear();
+        }
+
+        Ok(CompositorShaders {
+            rgba,
+            rgba_fast_path,
+            yuv,
+        })
+    }
+
+    pub fn get(
+        &mut self,
+        format: CompositeSurfaceFormat,
+        buffer_kind: ImageBufferKind,
+        features: CompositeFeatures,
+    ) -> &mut LazilyCompiledShader {
+        match format {
+            CompositeSurfaceFormat::Rgba => {
+                if features.contains(CompositeFeatures::NO_UV_CLAMP)
+                    && features.contains(CompositeFeatures::NO_COLOR_MODULATION)
+                {
+                    let shader_index = Self::get_shader_index(buffer_kind);
+                    self.rgba_fast_path[shader_index]
+                        .as_mut()
+                        .expect("bug: unsupported rgba fast path shader requested")
+                } else {
+                    let shader_index = Self::get_shader_index(buffer_kind);
+                    self.rgba[shader_index]
+                        .as_mut()
+                        .expect("bug: unsupported rgba shader requested")
+                }
+            }
+            CompositeSurfaceFormat::Yuv => {
+                let shader_index = Self::get_shader_index(buffer_kind);
+                self.yuv[shader_index]
+                    .as_mut()
+                    .expect("bug: unsupported yuv shader requested")
+            }
+        }
+    }
+
+    fn get_shader_index(buffer_kind: ImageBufferKind) -> usize {
+        buffer_kind as usize
+    }
+
+    pub fn deinit(&mut self, device: &mut Device) {
+        for shader in self.rgba.drain(..) {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.rgba_fast_path.drain(..) {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+        for shader in self.yuv.drain(..) {
+            if let Some(shader) = shader {
+                shader.deinit(device);
+            }
+        }
+    }
+}
+
+fn get_shader_feature_flags(gl_type: GlType, texture_external_version: TextureExternalVersion) -> ShaderFeatureFlags {
+    match gl_type {
+        GlType::Gl => ShaderFeatureFlags::GL,
+        GlType::Gles => {
+            let texture_external_flag = match texture_external_version {
+                TextureExternalVersion::ESSL3 => ShaderFeatureFlags::TEXTURE_EXTERNAL,
+                TextureExternalVersion::ESSL1 => ShaderFeatureFlags::TEXTURE_EXTERNAL_ESSL1,
+            };
+            ShaderFeatureFlags::GLES | texture_external_flag
+        }
+    }
+}
diff --git a/gfx/wr/webrender/src/renderer/upload.rs b/gfx/wr/webrender/src/renderer/upload.rs
new file mode 100644
index 0000000000..0ba053cd76
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/upload.rs
@@ -0,0 +1,847 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! This module contains the convoluted logic that goes into uploading content into
+//! the texture cache's textures.
+//!
+//! We need to support various combinations of code paths depending on the quirks of
+//! each hardware/driver configuration:
+//! - direct upload,
+//! - staged upload via a pixel buffer object,
+//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
+//! - copy from the staging to destination textures, either via blits or batched draw calls.
+//!
+//! Conceptually a lot of this logic should probably be in the device module, but some code
+//! here relies on submitting draw calls via the renderer.
+
+
+use std::mem;
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::time::Duration;
+use euclid::{Transform3D, point2};
+use time::precise_time_ns;
+use malloc_size_of::MallocSizeOfOps;
+use api::units::*;
+use api::{ExternalImageSource, ImageBufferKind, ImageFormat};
+use crate::renderer::{
+    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
+};
+use crate::internal_types::{
+    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
+    CacheTextureId, RenderTargetInfo,
+};
+use crate::device::{
+    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
+    TextureFilter,
+};
+use crate::gpu_types::CopyInstance;
+use crate::batch::BatchTextures;
+use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
+use crate::profiler;
+use crate::render_api::MemoryReport;
+
+pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
+
+/// Upload a number of items to texture cache textures.
+///
+/// This is the main entry point of the texture cache upload code.
+/// See also the module documentation for more information.
+pub fn upload_to_texture_cache(
+    renderer: &mut Renderer,
+    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
+) {
+
+    let mut stats = UploadStats {
+        num_draw_calls: 0,
+        upload_time: 0,
+        cpu_buffer_alloc_time: 0,
+        texture_alloc_time: 0,
+        cpu_copy_time: 0,
+        gpu_copy_commands_time: 0,
+        bytes_uploaded: 0,
+        items_uploaded: 0,
+    };
+
+    let upload_total_start = precise_time_ns();
+
+    let mut batch_upload_textures = Vec::new();
+
+    // A list of copies that must be performed from the temporary textures to the texture cache.
+    let mut batch_upload_copies = Vec::new();
+
+    // For each texture format, this stores a list of staging buffers
+    // and a texture allocator for packing the buffers.
+    let mut batch_upload_buffers = FastHashMap::default();
+
+    // For best performance we use a single TextureUploader for all uploads.
+    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
+    let mut uploader = renderer.device.upload_texture(
+        &mut renderer.texture_upload_pbo_pool,
+    );
+
+    let num_updates = update_list.len();
+
+    for (texture_id, updates) in update_list {
+        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id].texture;
+        for update in updates {
+            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
+            let mut arc_data = None; 
+            let dummy_data;
+            let data = match source {
+                TextureUpdateSource::Bytes { ref data } => {
+                    arc_data = Some(data.clone());
+                    &data[offset as usize ..]
+                }
+                TextureUpdateSource::External { id, channel_index } => {
+                    let handler = renderer.external_image_handler
+                        .as_mut()
+                        .expect("Found external image, but no handler set!");
+                    // The filter is only relevant for NativeTexture external images.
+                    match handler.lock(id, channel_index).source {
+                        ExternalImageSource::RawData(data) => {
+                            &data[offset as usize ..]
+                        }
+                        ExternalImageSource::Invalid => {
+                            // Create a local buffer to fill the pbo.
+                            let bpp = texture.get_format().bytes_per_pixel();
+                            let width = stride.unwrap_or(rect.width() * bpp);
+                            let total_size = width * rect.height();
+                            // WR haven't support RGBAF32 format in texture_cache, so
+                            // we use u8 type here.
+                            dummy_data = vec![0xFFu8; total_size as usize];
+                            &dummy_data
+                        }
+                        ExternalImageSource::NativeTexture(eid) => {
+                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
+                        }
+                    }
+                }
+                TextureUpdateSource::DebugClear => {
+                    let draw_target = DrawTarget::from_texture(
+                        texture,
+                        false,
+                    );
+                    renderer.device.bind_draw_target(draw_target);
+                    renderer.device.clear_target(
+                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
+                        None,
+                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
+                    );
+
+                    continue;
+                }
+            };
+
+            stats.items_uploaded += 1;
+
+            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
+                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
+                rect.width() <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
+                rect.height() <= BATCH_UPLOAD_TEXTURE_SIZE.height &&
+                rect.area() < renderer.device.batched_upload_threshold();
+
+            if use_batch_upload
+                && arc_data.is_some()
+                && matches!(renderer.device.upload_method(), &UploadMethod::Immediate)
+                && rect.area() > BATCH_UPLOAD_TEXTURE_SIZE.area() / 2 {
+                skip_staging_buffer(
+                    &mut renderer.device,
+                    &mut renderer.staging_texture_pool,
+                    rect,
+                    stride,
+                    arc_data.unwrap(),
+                    texture_id,
+                    texture,
+                    &mut batch_upload_buffers,
+                    &mut batch_upload_textures,
+                    &mut batch_upload_copies,
+                    &mut stats,
+                );
+            } else if use_batch_upload {
+                copy_into_staging_buffer(
+                    &mut renderer.device,
+                    &mut uploader,
+                    &mut renderer.staging_texture_pool,
+                    rect,
+                    stride,
+                    data,
+                    texture_id,
+                    texture,
+                    &mut batch_upload_buffers,
+                    &mut batch_upload_textures,
+                    &mut batch_upload_copies,
+                    &mut stats,
+                );
+            } else {
+                let upload_start_time = precise_time_ns();
+
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    rect,
+                    stride,
+                    format_override,
+                    data.as_ptr(),
+                    data.len()
+                );
+
+                stats.upload_time += precise_time_ns() - upload_start_time;
+            }
+
+            if let TextureUpdateSource::External { id, channel_index } = source {
+                let handler = renderer.external_image_handler
+                    .as_mut()
+                    .expect("Found external image, but no handler set!");
+                handler.unlock(id, channel_index);
+            }
+        }
+    }
+
+    let upload_start_time = precise_time_ns();
+    // Upload batched texture updates to their temporary textures.
+    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
+        let texture = &batch_upload_textures[batch_buffer.texture_index];
+        match batch_buffer.staging_buffer {
+            StagingBufferKind::Pbo(pbo) => {
+                stats.bytes_uploaded += uploader.upload_staged(
+                    &mut renderer.device,
+                    texture,
+                    DeviceIntRect::from_size(texture.get_dimensions()),
+                    None,
+                    pbo,
+                );
+            }
+            StagingBufferKind::CpuBuffer { bytes, .. } => {
+                let bpp = texture.get_format().bytes_per_pixel();
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    batch_buffer.upload_rect,
+                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
+                    None,
+                    bytes.as_ptr(),
+                    bytes.len()
+                );
+                renderer.staging_texture_pool.return_temporary_buffer(bytes);
+            }
+            StagingBufferKind::Image { bytes, stride } => {
+                stats.bytes_uploaded += uploader.upload(
+                    &mut renderer.device,
+                    texture,
+                    batch_buffer.upload_rect,
+                    stride,
+                    None,
+                    bytes.as_ptr(),
+                    bytes.len()
+                );
+            }
+        }
+    }
+    stats.upload_time += precise_time_ns() - upload_start_time;
+
+
+    // Flush all uploads, batched or otherwise.
+    let flush_start_time = precise_time_ns();
+    uploader.flush(&mut renderer.device);
+    stats.upload_time += precise_time_ns() - flush_start_time;
+
+    if !batch_upload_copies.is_empty() {
+        // Copy updates that were batch uploaded to their correct destination in the texture cache.
+        // Sort them by destination and source to minimize framebuffer binding changes.
+        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
+
+        let gpu_copy_start = precise_time_ns();
+
+        if renderer.device.use_draw_calls_for_texture_copy() {
+            // Some drivers have a very high CPU overhead when submitting hundreds of small blit
+            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
+            // few hundred blits). In this case we do the copy with batched draw calls.
+            copy_from_staging_to_cache_using_draw_calls(
+                renderer,
+                &mut stats,
+                &batch_upload_textures,
+                batch_upload_copies,
+            );
+        } else {
+            copy_from_staging_to_cache(
+                renderer,
+                &batch_upload_textures,
+                batch_upload_copies,
+            );
+        }
+
+        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
+    }
+
+    for texture in batch_upload_textures.drain(..) {
+        renderer.staging_texture_pool.return_texture(texture);
+    }
+
+    // Update the profile counters. We use add instead of set because
+    // this function can be called several times per frame.
+    // We don't update the counters when their value is zero, so that
+    // the profiler can treat them as events and we can get notified
+    // when they happen.
+
+    let upload_total = precise_time_ns() - upload_total_start;
+    renderer.profile.add(
+        profiler::TOTAL_UPLOAD_TIME,
+        profiler::ns_to_ms(upload_total)
+    );
+
+    if num_updates > 0 {
+        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
+    }
+
+    if stats.bytes_uploaded > 0 {
+        renderer.profile.add(
+            profiler::TEXTURE_UPLOADS_MEM,
+            profiler::bytes_to_mb(stats.bytes_uploaded)
+        );
+    }
+
+    if stats.cpu_copy_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_CPU_COPY_TIME,
+            profiler::ns_to_ms(stats.cpu_copy_time)
+        );
+    }
+    if stats.upload_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_TIME,
+            profiler::ns_to_ms(stats.upload_time)
+        );
+    }
+    if stats.texture_alloc_time > 0 {
+        renderer.profile.add(
+            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
+            profiler::ns_to_ms(stats.texture_alloc_time)
+        );
+    }
+    if stats.cpu_buffer_alloc_time > 0 {
+        renderer.profile.add(
+            profiler::CPU_TEXTURE_ALLOCATION_TIME,
+            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
+        );
+    }
+    if stats.num_draw_calls > 0{
+        renderer.profile.add(
+            profiler::UPLOAD_NUM_COPY_BATCHES,
+            stats.num_draw_calls
+        );
+    }
+
+    if stats.gpu_copy_commands_time > 0 {
+        renderer.profile.add(
+            profiler::UPLOAD_GPU_COPY_TIME,
+            profiler::ns_to_ms(stats.gpu_copy_commands_time)
+        );
+    }
+
+    let add_markers = profiler::thread_is_being_profiled();
+    if add_markers && stats.bytes_uploaded > 0 {
+    	let details = format!("{} bytes uploaded, {} items", stats.bytes_uploaded, stats.items_uploaded);
+    	profiler::add_text_marker(&"Texture uploads", &details, Duration::from_nanos(upload_total));
+    }
+}
+
+/// Copy an item into a batched upload staging buffer.
+fn copy_into_staging_buffer<'a>(
+    device: &mut Device,
+    uploader: &mut TextureUploader< 'a>,
+    staging_texture_pool: &mut UploadTexturePool,
+    update_rect: DeviceIntRect,
+    update_stride: Option<i32>,
+    data: &[u8],
+    dest_texture_id: CacheTextureId,
+    texture: &Texture,
+    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+    batch_upload_textures: &mut Vec<Texture>,
+    batch_upload_copies: &mut Vec<BatchUploadCopy>,
+    stats: &mut UploadStats
+) {
+    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
+        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+    // Allocate a region within the staging buffer for this update. If there is
+    // no room in an existing buffer then allocate another texture and buffer.
+    let (slice, origin) = match allocator.allocate(&update_rect.size()) {
+        Some((slice, origin)) => (slice, origin),
+        None => {
+            let new_slice = FreeRectSlice(buffers.len() as u32);
+            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size());
+
+            let texture_alloc_time_start = precise_time_ns();
+            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+            let texture_index = batch_upload_textures.len();
+            batch_upload_textures.push(staging_texture);
+
+            let cpu_buffer_alloc_start_time = precise_time_ns();
+            let staging_buffer = match device.upload_method() {
+                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
+                    bytes: staging_texture_pool.get_temporary_buffer(),
+                },
+                UploadMethod::PixelBuffer(_) => {
+                    let pbo = uploader.stage(
+                        device,
+                        texture.get_format(),
+                        BATCH_UPLOAD_TEXTURE_SIZE,
+                    ).unwrap();
+
+                    StagingBufferKind::Pbo(pbo)
+                }
+            };
+            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
+
+            buffers.push(BatchUploadBuffer {
+                staging_buffer,
+                texture_index,
+                upload_rect: DeviceIntRect::zero()
+            });
+
+            (new_slice, DeviceIntPoint::zero())
+        }
+    };
+    let buffer = &mut buffers[slice.0 as usize];
+    let allocated_rect = DeviceIntRect::from_origin_and_size(origin, update_rect.size());
+    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
+
+    batch_upload_copies.push(BatchUploadCopy {
+        src_texture_index: buffer.texture_index,
+        src_offset: allocated_rect.min,
+        dest_texture_id,
+        dest_offset: update_rect.min,
+        size: update_rect.size(),
+    });
+
+    unsafe {
+        let memcpy_start_time = precise_time_ns();
+        let bpp = texture.get_format().bytes_per_pixel() as usize;
+        let width_bytes = update_rect.width() as usize * bpp;
+        let src_stride = update_stride.map_or(width_bytes, |stride| {
+            assert!(stride >= 0);
+            stride as usize
+        });
+        let src_size = (update_rect.height() as usize - 1) * src_stride + width_bytes;
+        assert!(src_size <= data.len());
+
+        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
+        let (dst_stride, dst) = match &mut buffer.staging_buffer {
+            StagingBufferKind::Pbo(buffer) => (
+                buffer.get_stride(),
+                buffer.get_mapping(),
+            ),
+            StagingBufferKind::CpuBuffer { bytes } => (
+                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
+                &mut bytes[..],
+            ),
+            StagingBufferKind::Image { .. } => unreachable!(),
+        };
+
+        // copy the data line-by-line in to the buffer so that we do not overwrite
+        // any other region of the buffer.
+        for y in 0..allocated_rect.height() as usize {
+            let src_start = y * src_stride;
+            let src_end = src_start + width_bytes;
+            let dst_start = (allocated_rect.min.y as usize + y as usize) * dst_stride +
+                allocated_rect.min.x as usize * bpp;
+            let dst_end = dst_start + width_bytes;
+
+            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
+        }
+
+        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
+    }
+}
+
+/// Take this code path instead of copying into a staging CPU buffer when the image
+/// we would copy is large enough that it's unlikely anything else would fit in the
+/// buffer, therefore we might as well copy directly from the source image's pixels.
+fn skip_staging_buffer<'a>(
+    device: &mut Device,
+    staging_texture_pool: &mut UploadTexturePool,
+    update_rect: DeviceIntRect,
+    stride: Option<i32>,
+    data: Arc<Vec<u8>>,
+    dest_texture_id: CacheTextureId,
+    texture: &Texture,
+    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
+    batch_upload_textures: &mut Vec<Texture>,
+    batch_upload_copies: &mut Vec<BatchUploadCopy>,
+    stats: &mut UploadStats
+) {
+    let (_, buffers) = batch_upload_buffers.entry(texture.get_format())
+        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
+
+    let texture_alloc_time_start = precise_time_ns();
+    let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
+    stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
+
+    let texture_index = batch_upload_textures.len();
+    batch_upload_textures.push(staging_texture);
+
+    buffers.push(BatchUploadBuffer {
+        staging_buffer: StagingBufferKind::Image { bytes: data, stride },
+        texture_index,
+        upload_rect: DeviceIntRect::from_size(update_rect.size())
+    });
+
+    batch_upload_copies.push(BatchUploadCopy {
+        src_texture_index: texture_index,
+        src_offset: point2(0, 0),
+        dest_texture_id,
+        dest_offset: update_rect.min,
+        size: update_rect.size(),
+    });
+}
+
+
+/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
+///
+/// Using blits instead of draw calls is supposedly more efficient but some drivers have
+/// a very high per-command overhead so in some configurations we end up using
+/// copy_from_staging_to_cache_using_draw_calls instead.
+fn copy_from_staging_to_cache(
+    renderer: &mut Renderer,
+    batch_upload_textures: &[Texture],
+    batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+    for copy in batch_upload_copies {
+        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+
+        renderer.device.copy_texture_sub_region(
+            &batch_upload_textures[copy.src_texture_index],
+            copy.src_offset.x as _,
+            copy.src_offset.y as _,
+            dest_texture,
+            copy.dest_offset.x as _,
+            copy.dest_offset.y as _,
+            copy.size.width as _,
+            copy.size.height as _,
+        );
+    }
+}
+
+/// Generate and submit composite shader batches to copy from
+/// the staging textures to the destination cache textures.
+///
+/// If this shows up in GPU time ptofiles we could replace it with
+/// a simpler shader (composite.glsl is already quite simple).
+fn copy_from_staging_to_cache_using_draw_calls(
+    renderer: &mut Renderer,
+    stats: &mut UploadStats,
+    batch_upload_textures: &[Texture],
+    batch_upload_copies: Vec<BatchUploadCopy>,
+) {
+    let mut copy_instances = Vec::new();
+    let mut prev_src = None;
+    let mut prev_dst = None;
+    let mut dst_texture_size = DeviceSize::new(0.0, 0.0);
+
+    for copy in batch_upload_copies {
+
+        let src_changed = prev_src != Some(copy.src_texture_index);
+        let dst_changed = prev_dst != Some(copy.dest_texture_id);
+
+        if (src_changed || dst_changed) && !copy_instances.is_empty() {
+            renderer.draw_instanced_batch(
+                &copy_instances,
+                VertexArrayKind::Copy,
+                // We bind the staging texture manually because it isn't known
+                // to the texture resolver.
+                &BatchTextures::empty(),
+                &mut RendererStats::default(),
+            );
+
+            stats.num_draw_calls += 1;
+            copy_instances.clear();
+        }
+
+        if dst_changed {
+            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id].texture;
+            dst_texture_size = dest_texture.get_dimensions().to_f32();
+
+            let draw_target = DrawTarget::from_texture(dest_texture, false);
+            renderer.device.bind_draw_target(draw_target);
+
+            renderer.shaders
+                .borrow_mut()
+                .ps_copy
+                .bind(
+                    &mut renderer.device,
+                    &Transform3D::identity(),
+                    None,
+                    &mut renderer.renderer_errors,
+                    &mut renderer.profile,
+                );
+
+            prev_dst = Some(copy.dest_texture_id);
+        }
+
+        if src_changed {
+            renderer.device.bind_texture(
+                TextureSampler::Color0,
+                &batch_upload_textures[copy.src_texture_index],
+                Swizzle::default(),
+            );
+
+            prev_src = Some(copy.src_texture_index)
+        }
+
+        let src_rect = DeviceRect::from_origin_and_size(
+            copy.src_offset.to_f32(),
+            copy.size.to_f32(),
+        );
+
+        let dst_rect = DeviceRect::from_origin_and_size(
+            copy.dest_offset.to_f32(),
+            copy.size.to_f32(),
+        );
+
+        copy_instances.push(CopyInstance {
+            src_rect,
+            dst_rect,
+            dst_texture_size,
+        });
+    }
+
+    if !copy_instances.is_empty() {
+        renderer.draw_instanced_batch(
+            &copy_instances,
+            VertexArrayKind::Copy,
+            &BatchTextures::empty(),
+            &mut RendererStats::default(),
+        );
+
+        stats.num_draw_calls += 1;
+    }
+}
+
+/// A very basic pool to avoid reallocating staging textures as well as staging
+/// CPU side buffers.
+pub struct UploadTexturePool {
+    /// The textures in the pool associated with a last used frame index.
+    ///
+    /// The outer array corresponds to each of teh three supported texture formats.
+    textures: [VecDeque<(Texture, u64)>; 3],
+    // Frame at which to deallocate some textures if there are too many in the pool,
+    // for each format.
+    delay_texture_deallocation: [u64; 3],
+    current_frame: u64,
+
+    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
+    ///
+    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
+    /// To keep things simple we always allocate enough memory for formats with four bytes
+    /// per pixel (more than we need for alpha-only textures but it works just as well).
+    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
+    min_temporary_buffers: usize,
+    delay_buffer_deallocation: u64,
+}
+
+impl UploadTexturePool {
+    pub fn new() -> Self {
+        UploadTexturePool {
+            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
+            delay_texture_deallocation: [0; 3],
+            current_frame: 0,
+            temporary_buffers: Vec::new(),
+            min_temporary_buffers: 0,
+            delay_buffer_deallocation: 0,
+        }
+    }
+
+    fn format_index(&self, format: ImageFormat) -> usize {
+        match format {
+            ImageFormat::RGBA8 => 0,
+            ImageFormat::BGRA8 => 1,
+            ImageFormat::R8 => 2,
+            _ => { panic!("unexpected format"); }
+        }
+    }
+
+    pub fn begin_frame(&mut self) {
+        self.current_frame += 1;
+        self.min_temporary_buffers = self.temporary_buffers.len();
+    }
+
+    /// Create or reuse a staging texture.
+    ///
+    /// See also return_texture.
+    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
+
+        // First try to reuse a texture from the pool.
+        // "available" here means hasn't been used for 2 frames to avoid stalls.
+        // No need to scan the vector. Newer textures are always pushed at the back
+        // of the vector so we know the first element is the least recently used.
+        let format_idx = self.format_index(format);
+        let can_reuse = self.textures[format_idx].get(0)
+            .map(|tex| self.current_frame - tex.1 > 2)
+            .unwrap_or(false);
+
+        if can_reuse {
+            return self.textures[format_idx].pop_front().unwrap().0;
+        }
+
+        // If we couldn't find an available texture, create a new one.
+
+        device.create_texture(
+            ImageBufferKind::Texture2D,
+            format,
+            BATCH_UPLOAD_TEXTURE_SIZE.width,
+            BATCH_UPLOAD_TEXTURE_SIZE.height,
+            TextureFilter::Nearest,
+            // Currently we need render target support as we always use glBlitFramebuffer
+            // to copy the texture data. Instead, we should use glCopyImageSubData on some
+            // platforms, and avoid creating the FBOs in that case.
+            Some(RenderTargetInfo { has_depth: false }),
+        )
+    }
+
+    /// Hand the staging texture back to the pool after being done with uploads.
+    ///
+    /// The texture must have been obtained from this pool via get_texture.
+    pub fn return_texture(&mut self, texture: Texture) {
+        let format_idx = self.format_index(texture.get_format());
+        self.textures[format_idx].push_back((texture, self.current_frame));
+    }
+
+    /// Create or reuse a temporary CPU buffer.
+    ///
+    /// These buffers are used in the batched upload path when PBOs are not supported.
+    /// Content is first written to the temporary buffer and uploaded via a single
+    /// glTexSubImage2D call.
+    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
+        let buffer = self.temporary_buffers.pop().unwrap_or_else(|| {
+            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
+        });
+        self.min_temporary_buffers = self.min_temporary_buffers.min(self.temporary_buffers.len());
+        buffer
+    }
+
+    /// Return memory that was obtained from this pool via get_temporary_buffer.
+    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
+        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
+        self.temporary_buffers.push(buffer);
+    }
+
+    /// Deallocate this pool's CPU and GPU memory.
+    pub fn delete_textures(&mut self, device: &mut Device) {
+        for format in &mut self.textures {
+            while let Some(texture) = format.pop_back() {
+                device.delete_texture(texture.0)
+            }
+        }
+        self.temporary_buffers.clear();
+    }
+
+    /// Deallocate some textures if there are too many for a long time.
+    pub fn end_frame(&mut self, device: &mut Device) {
+        for format_idx in 0..self.textures.len() {
+            // Count the number of reusable staging textures.
+            // if it stays high for a large number of frames, truncate it back to 8-ish
+            // over multiple frames.
+
+            let mut num_reusable_textures = 0;
+            for texture in &self.textures[format_idx] {
+                if self.current_frame - texture.1 > 2 {
+                    num_reusable_textures += 1;
+                }
+            }
+
+            if num_reusable_textures < 8 {
+                // Don't deallocate textures for another 120 frames.
+                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
+            }
+
+            // Deallocate up to 4 staging textures every frame.
+            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
+                num_reusable_textures.min(4)
+            } else {
+                0
+            };
+
+            for _ in 0..to_remove {
+                let texture = self.textures[format_idx].pop_front().unwrap().0;
+                device.delete_texture(texture);
+            }
+        }
+
+        // Similar logic for temporary CPU buffers. Our calls to get and return
+        // temporary buffers should have been balanced for this frame, but the call
+        // get_temporary_buffer will allocate a buffer if the vec is empty. Since we
+        // carry these buffers from frame to frame, we keep track of the smallest
+        // length of the temporary_buffers vec that we encountered this frame. Those
+        // buffers were not touched and we deallocate some if there are a lot of them.
+        let unused_buffers = self.min_temporary_buffers;
+        if unused_buffers < 8 {
+            self.delay_buffer_deallocation = self.current_frame + 120;
+        }
+        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
+            unused_buffers.min(4)
+        } else {
+            0
+        };
+        for _ in 0..to_remove {
+            // Unlike textures it doesn't matter whether we pop from the front or back
+            // of the vector.
+            self.temporary_buffers.pop();
+        }
+    }
+
+    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
+        for buf in &self.temporary_buffers {
+            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
+        }
+
+        for format in &self.textures {
+            for texture in format {
+                report.upload_staging_textures += texture.0.size_in_bytes();
+            }
+        }
+    }
+}
+
+struct UploadStats {
+    num_draw_calls: u32,
+    upload_time: u64,
+    cpu_buffer_alloc_time: u64,
+    texture_alloc_time: u64,
+    cpu_copy_time: u64,
+    gpu_copy_commands_time: u64,
+    bytes_uploaded: usize,
+    items_uploaded: usize,
+}
+
+#[derive(Debug)]
+enum StagingBufferKind<'a> {
+    Pbo(UploadStagingBuffer<'a>),
+    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> },
+    Image { bytes: Arc<Vec<u8>>, stride: Option<i32> },
+}
+#[derive(Debug)]
+struct BatchUploadBuffer<'a> {
+    staging_buffer: StagingBufferKind<'a>,
+    texture_index: usize,
+    // A rectangle containing all items going into this staging texture, so
+    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
+    upload_rect: DeviceIntRect,
+}
+
+// On some devices performing many small texture uploads is slow, so instead we batch
+// updates in to a small number of uploads to temporary textures, then copy from those
+// textures to the correct place in the texture cache.
+// A list of temporary textures that batches of updates are uploaded to.
+#[derive(Debug)]
+struct BatchUploadCopy {
+    // Index within batch_upload_textures
+    src_texture_index: usize,
+    src_offset: DeviceIntPoint,
+    dest_texture_id: CacheTextureId,
+    dest_offset: DeviceIntPoint,
+    size: DeviceIntSize,
+}
diff --git a/gfx/wr/webrender/src/renderer/vertex.rs b/gfx/wr/webrender/src/renderer/vertex.rs
new file mode 100644
index 0000000000..ff555363d8
--- /dev/null
+++ b/gfx/wr/webrender/src/renderer/vertex.rs
@@ -0,0 +1,1154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Rendering logic related to the vertex shaders and their states, uncluding
+//!  - Vertex Array Objects
+//!  - vertex layout descriptors
+//!  - textures bound at vertex stage
+
+use std::{marker::PhantomData, mem, num::NonZeroUsize, ops};
+use api::units::*;
+use crate::{
+    device::{
+        Device, Texture, TextureFilter, TextureUploader, UploadPBOPool, VertexUsageHint, VAO,
+    },
+    frame_builder::Frame,
+    gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, TransformData},
+    internal_types::Swizzle,
+    render_task::RenderTaskData,
+};
+
+pub const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10;
+
+pub const MAX_VERTEX_TEXTURE_WIDTH: usize = webrender_build::MAX_VERTEX_TEXTURE_WIDTH;
+
+pub mod desc {
+    use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
+
+    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[VertexAttribute {
+            name: "aData",
+            count: 4,
+            kind: VertexAttributeKind::I32,
+        }],
+    };
+
+    pub const BLUR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aBlurRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurSourceTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurDirection",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const LINE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aLocalSize",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aWavyLineThickness",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStyle",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const FAST_LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartPoint",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndPoint",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const RADIAL_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCenter",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartRadius",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndRadius",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aXYRatio",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const CONIC_GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCenter",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScale",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartOffset",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aEndOffset",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aAngle",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aExtendMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aGradientStopsAddress",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const BORDER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskOrigin",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFlags",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aWidths",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRadii",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const SCALE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aScaleTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScaleSourceRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_RECT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipLocalPos",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipMode",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_TL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_TL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_TR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_TR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_BL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_BL",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRect_BR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipRadii_BR",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_BOX_SHADOW: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aClipSrcRectSize",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipMode",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aStretchMode",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipDestRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLIP_IMAGE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            // common clip attributes
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            // specific clip attributes
+            VertexAttribute {
+                name: "aClipTileRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aClipLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::U16Norm,
+            },
+            VertexAttribute {
+                name: "aValue",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[],
+    };
+
+    pub const RESOLVE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[VertexAttribute {
+            name: "aRect",
+            count: 4,
+            kind: VertexAttributeKind::F32,
+        }],
+    };
+
+    pub const SVG_FILTER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFilterRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput1TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput2TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterKind",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInputCount",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterGenericInt",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterExtraDataAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const MASK: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aData",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipData",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFromPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFromNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aPathID",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aStencilOrigin",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aSubpixel",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const COMPOSITE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aLocalRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDeviceClipRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aParams",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTransform",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLEAR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const COPY: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[VertexAttribute {
+            name: "aPosition",
+            count: 2,
+            kind: VertexAttributeKind::U8Norm,
+        }],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "a_src_rect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "a_dst_rect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "a_dst_texture_size",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+}
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum VertexArrayKind {
+    Primitive,
+    Blur,
+    ClipImage,
+    ClipRect,
+    ClipBoxShadow,
+    VectorStencil,
+    VectorCover,
+    Border,
+    Scale,
+    LineDecoration,
+    FastLinearGradient,
+    LinearGradient,
+    RadialGradient,
+    ConicGradient,
+    Resolve,
+    SvgFilter,
+    Composite,
+    Clear,
+    Copy,
+    Mask,
+}
+
+pub struct VertexDataTexture<T> {
+    texture: Option<Texture>,
+    format: api::ImageFormat,
+    _marker: PhantomData<T>,
+}
+
+impl<T> VertexDataTexture<T> {
+    pub fn new(format: api::ImageFormat) -> Self {
+        Self {
+            texture: None,
+            format,
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
+    pub fn texture(&self) -> &Texture {
+        self.texture.as_ref().unwrap()
+    }
+
+    /// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
+    pub fn size_in_bytes(&self) -> usize {
+        self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
+    }
+
+    pub fn update<'a>(
+        &'a mut self,
+        device: &mut Device,
+        texture_uploader: &mut TextureUploader<'a>,
+        data: &mut Vec<T>,
+    ) {
+        debug_assert!(mem::size_of::<T>() % 16 == 0);
+        let texels_per_item = mem::size_of::<T>() / 16;
+        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;
+        debug_assert_ne!(items_per_row, 0);
+
+        // Ensure we always end up with a texture when leaving this method.
+        let mut len = data.len();
+        if len == 0 {
+            if self.texture.is_some() {
+                return;
+            }
+            data.reserve(items_per_row);
+            len = items_per_row;
+        } else {
+            // Extend the data array to have enough capacity to upload at least
+            // a multiple of the row size.  This ensures memory safety when the
+            // array is passed to OpenGL to upload to the GPU.
+            let extra = len % items_per_row;
+            if extra != 0 {
+                let padding = items_per_row - extra;
+                data.reserve(padding);
+                len += padding;
+            }
+        }
+
+        let needed_height = (len / items_per_row) as i32;
+        let existing_height = self
+            .texture
+            .as_ref()
+            .map_or(0, |t| t.get_dimensions().height);
+
+        // Create a new texture if needed.
+        //
+        // These textures are generally very small, which is why we don't bother
+        // with incremental updates and just re-upload every frame. For most pages
+        // they're one row each, and on stress tests like css-francine they end up
+        // in the 6-14 range. So we size the texture tightly to what we need (usually
+        // 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS`
+        // rows. This helps with memory overhead, especially because there are several
+        // instances of these textures per Renderer.
+        if needed_height > existing_height
+            || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height
+        {
+            // Drop the existing texture, if any.
+            if let Some(t) = self.texture.take() {
+                device.delete_texture(t);
+            }
+
+            let texture = device.create_texture(
+                api::ImageBufferKind::Texture2D,
+                self.format,
+                MAX_VERTEX_TEXTURE_WIDTH as i32,
+                // Ensure height is at least two to work around
+                // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039
+                needed_height.max(2),
+                TextureFilter::Nearest,
+                None,
+            );
+            self.texture = Some(texture);
+        }
+
+        // Note: the actual width can be larger than the logical one, with a few texels
+        // of each row unused at the tail. This is needed because there is still hardware
+        // (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]).
+        //
+        // [1] https://software.intel.com/en-us/articles/opengl-performance-tips-power-of-two-textures-have-better-performance
+        let logical_width = if needed_height == 1 {
+            data.len() * texels_per_item
+        } else {
+            MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)
+        };
+
+        let rect = DeviceIntRect::from_size(
+            DeviceIntSize::new(logical_width as i32, needed_height),
+        );
+
+        debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds");
+        texture_uploader.upload(
+            device,
+            self.texture(),
+            rect,
+            None,
+            None,
+            data.as_ptr(),
+            len,
+        );
+    }
+
+    pub fn deinit(mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+    }
+}
+
+pub struct VertexDataTextures {
+    prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>,
+    prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>,
+    transforms_texture: VertexDataTexture<TransformData>,
+    render_task_texture: VertexDataTexture<RenderTaskData>,
+}
+
+impl VertexDataTextures {
+    pub fn new() -> Self {
+        VertexDataTextures {
+            prim_header_f_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+            prim_header_i_texture: VertexDataTexture::new(api::ImageFormat::RGBAI32),
+            transforms_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+            render_task_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
+        }
+    }
+
+    pub fn update(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool, frame: &mut Frame) {
+        let mut texture_uploader = device.upload_texture(pbo_pool);
+        self.prim_header_f_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.prim_headers.headers_float,
+        );
+        self.prim_header_i_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.prim_headers.headers_int,
+        );
+        self.transforms_texture
+            .update(device, &mut texture_uploader, &mut frame.transform_palette);
+        self.render_task_texture.update(
+            device,
+            &mut texture_uploader,
+            &mut frame.render_tasks.task_data,
+        );
+
+        // Flush and drop the texture uploader now, so that
+        // we can borrow the textures to bind them.
+        texture_uploader.flush(device);
+
+        device.bind_texture(
+            super::TextureSampler::PrimitiveHeadersF,
+            &self.prim_header_f_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::PrimitiveHeadersI,
+            &self.prim_header_i_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::TransformPalette,
+            &self.transforms_texture.texture(),
+            Swizzle::default(),
+        );
+        device.bind_texture(
+            super::TextureSampler::RenderTasks,
+            &self.render_task_texture.texture(),
+            Swizzle::default(),
+        );
+    }
+
+    pub fn size_in_bytes(&self) -> usize {
+        self.prim_header_f_texture.size_in_bytes()
+            + self.prim_header_i_texture.size_in_bytes()
+            + self.transforms_texture.size_in_bytes()
+            + self.render_task_texture.size_in_bytes()
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        self.transforms_texture.deinit(device);
+        self.prim_header_f_texture.deinit(device);
+        self.prim_header_i_texture.deinit(device);
+        self.render_task_texture.deinit(device);
+    }
+}
+
+pub struct RendererVAOs {
+    prim_vao: VAO,
+    blur_vao: VAO,
+    clip_rect_vao: VAO,
+    clip_box_shadow_vao: VAO,
+    clip_image_vao: VAO,
+    border_vao: VAO,
+    line_vao: VAO,
+    scale_vao: VAO,
+    fast_linear_gradient_vao: VAO,
+    linear_gradient_vao: VAO,
+    radial_gradient_vao: VAO,
+    conic_gradient_vao: VAO,
+    resolve_vao: VAO,
+    svg_filter_vao: VAO,
+    composite_vao: VAO,
+    clear_vao: VAO,
+    copy_vao: VAO,
+    mask_vao: VAO,
+}
+
+impl RendererVAOs {
+    pub fn new(device: &mut Device, indexed_quads: Option<NonZeroUsize>) -> Self {
+        const QUAD_INDICES: [u16; 6] = [0, 1, 2, 2, 1, 3];
+        const QUAD_VERTICES: [[u8; 2]; 4] = [[0, 0], [0xFF, 0], [0, 0xFF], [0xFF, 0xFF]];
+
+        let instance_divisor = if indexed_quads.is_some() { 0 } else { 1 };
+        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES, instance_divisor);
+
+        device.bind_vao(&prim_vao);
+        match indexed_quads {
+            Some(count) => {
+                assert!(count.get() < u16::MAX as usize);
+                let quad_indices = (0 .. count.get() as u16)
+                    .flat_map(|instance| QUAD_INDICES.iter().map(move |&index| instance * 4 + index))
+                    .collect::<Vec<_>>();
+                device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
+                let quad_vertices = (0 .. count.get() as u16)
+                    .flat_map(|_| QUAD_VERTICES.iter().cloned())
+                    .collect::<Vec<_>>();
+                device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
+            }
+            None => {
+                device.update_vao_indices(&prim_vao, &QUAD_INDICES, VertexUsageHint::Static);
+                device.update_vao_main_vertices(&prim_vao, &QUAD_VERTICES, VertexUsageHint::Static);
+            }
+        }
+
+        RendererVAOs {
+            blur_vao: device.create_vao_with_new_instances(&desc::BLUR, &prim_vao),
+            clip_rect_vao: device.create_vao_with_new_instances(&desc::CLIP_RECT, &prim_vao),
+            clip_box_shadow_vao: device
+                .create_vao_with_new_instances(&desc::CLIP_BOX_SHADOW, &prim_vao),
+            clip_image_vao: device.create_vao_with_new_instances(&desc::CLIP_IMAGE, &prim_vao),
+            border_vao: device.create_vao_with_new_instances(&desc::BORDER, &prim_vao),
+            scale_vao: device.create_vao_with_new_instances(&desc::SCALE, &prim_vao),
+            line_vao: device.create_vao_with_new_instances(&desc::LINE, &prim_vao),
+            fast_linear_gradient_vao: device.create_vao_with_new_instances(&desc::FAST_LINEAR_GRADIENT, &prim_vao),
+            linear_gradient_vao: device.create_vao_with_new_instances(&desc::LINEAR_GRADIENT, &prim_vao),
+            radial_gradient_vao: device.create_vao_with_new_instances(&desc::RADIAL_GRADIENT, &prim_vao),
+            conic_gradient_vao: device.create_vao_with_new_instances(&desc::CONIC_GRADIENT, &prim_vao),
+            resolve_vao: device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao),
+            svg_filter_vao: device.create_vao_with_new_instances(&desc::SVG_FILTER, &prim_vao),
+            composite_vao: device.create_vao_with_new_instances(&desc::COMPOSITE, &prim_vao),
+            clear_vao: device.create_vao_with_new_instances(&desc::CLEAR, &prim_vao),
+            copy_vao: device.create_vao_with_new_instances(&desc::COPY, &prim_vao),
+            mask_vao: device.create_vao_with_new_instances(&desc::MASK, &prim_vao),
+            prim_vao,
+        }
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        device.delete_vao(self.prim_vao);
+        device.delete_vao(self.resolve_vao);
+        device.delete_vao(self.clip_rect_vao);
+        device.delete_vao(self.clip_box_shadow_vao);
+        device.delete_vao(self.clip_image_vao);
+        device.delete_vao(self.fast_linear_gradient_vao);
+        device.delete_vao(self.linear_gradient_vao);
+        device.delete_vao(self.radial_gradient_vao);
+        device.delete_vao(self.conic_gradient_vao);
+        device.delete_vao(self.blur_vao);
+        device.delete_vao(self.line_vao);
+        device.delete_vao(self.border_vao);
+        device.delete_vao(self.scale_vao);
+        device.delete_vao(self.svg_filter_vao);
+        device.delete_vao(self.composite_vao);
+        device.delete_vao(self.clear_vao);
+        device.delete_vao(self.copy_vao);
+        device.delete_vao(self.mask_vao);
+    }
+}
+
+impl ops::Index<VertexArrayKind> for RendererVAOs {
+    type Output = VAO;
+    fn index(&self, kind: VertexArrayKind) -> &VAO {
+        match kind {
+            VertexArrayKind::Primitive => &self.prim_vao,
+            VertexArrayKind::ClipImage => &self.clip_image_vao,
+            VertexArrayKind::ClipRect => &self.clip_rect_vao,
+            VertexArrayKind::ClipBoxShadow => &self.clip_box_shadow_vao,
+            VertexArrayKind::Blur => &self.blur_vao,
+            VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(),
+            VertexArrayKind::Border => &self.border_vao,
+            VertexArrayKind::Scale => &self.scale_vao,
+            VertexArrayKind::LineDecoration => &self.line_vao,
+            VertexArrayKind::FastLinearGradient => &self.fast_linear_gradient_vao,
+            VertexArrayKind::LinearGradient => &self.linear_gradient_vao,
+            VertexArrayKind::RadialGradient => &self.radial_gradient_vao,
+            VertexArrayKind::ConicGradient => &self.conic_gradient_vao,
+            VertexArrayKind::Resolve => &self.resolve_vao,
+            VertexArrayKind::SvgFilter => &self.svg_filter_vao,
+            VertexArrayKind::Composite => &self.composite_vao,
+            VertexArrayKind::Clear => &self.clear_vao,
+            VertexArrayKind::Copy => &self.copy_vao,
+            VertexArrayKind::Mask => &self.mask_vao,
+        }
+    }
+}